{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "6a00efd9",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting requests\n",
      "  Downloading requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)\n",
      "Collecting charset-normalizer<4,>=2 (from requests)\n",
      "  Downloading charset_normalizer-3.4.1-cp310-cp310-win_amd64.whl.metadata (36 kB)\n",
      "Collecting idna<4,>=2.5 (from requests)\n",
      "  Downloading idna-3.10-py3-none-any.whl.metadata (10 kB)\n",
      "Collecting urllib3<3,>=1.21.1 (from requests)\n",
      "  Downloading urllib3-2.4.0-py3-none-any.whl.metadata (6.5 kB)\n",
      "Collecting certifi>=2017.4.17 (from requests)\n",
      "  Downloading certifi-2025.1.31-py3-none-any.whl.metadata (2.5 kB)\n",
      "Downloading requests-2.32.3-py3-none-any.whl (64 kB)\n",
      "Downloading certifi-2025.1.31-py3-none-any.whl (166 kB)\n",
      "Downloading charset_normalizer-3.4.1-cp310-cp310-win_amd64.whl (102 kB)\n",
      "Downloading idna-3.10-py3-none-any.whl (70 kB)\n",
      "Downloading urllib3-2.4.0-py3-none-any.whl (128 kB)\n",
      "Installing collected packages: urllib3, idna, charset-normalizer, certifi, requests\n",
      "Successfully installed certifi-2025.1.31 charset-normalizer-3.4.1 idna-3.10 requests-2.32.3 urllib3-2.4.0\n"
     ]
    }
   ],
   "source": [
    "!pip install requests"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "42cdddab",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting spacy\n",
      "  Downloading spacy-3.8.5-cp310-cp310-win_amd64.whl.metadata (28 kB)\n",
      "Collecting spacy-legacy<3.1.0,>=3.0.11 (from spacy)\n",
      "  Downloading spacy_legacy-3.0.12-py2.py3-none-any.whl.metadata (2.8 kB)\n",
      "Collecting spacy-loggers<2.0.0,>=1.0.0 (from spacy)\n",
      "  Downloading spacy_loggers-1.0.5-py3-none-any.whl.metadata (23 kB)\n",
      "Collecting murmurhash<1.1.0,>=0.28.0 (from spacy)\n",
      "  Downloading murmurhash-1.0.12-cp310-cp310-win_amd64.whl.metadata (2.2 kB)\n",
      "Collecting cymem<2.1.0,>=2.0.2 (from spacy)\n",
      "  Downloading cymem-2.0.11-cp310-cp310-win_amd64.whl.metadata (8.8 kB)\n",
      "Collecting preshed<3.1.0,>=3.0.2 (from spacy)\n",
      "  Downloading preshed-3.0.9-cp310-cp310-win_amd64.whl.metadata (2.2 kB)\n",
      "Collecting thinc<8.4.0,>=8.3.4 (from spacy)\n",
      "  Downloading thinc-8.3.6-cp310-cp310-win_amd64.whl.metadata (15 kB)\n",
      "Collecting wasabi<1.2.0,>=0.9.1 (from spacy)\n",
      "  Downloading wasabi-1.1.3-py3-none-any.whl.metadata (28 kB)\n",
      "Collecting srsly<3.0.0,>=2.4.3 (from spacy)\n",
      "  Downloading srsly-2.5.1-cp310-cp310-win_amd64.whl.metadata (20 kB)\n",
      "Collecting catalogue<2.1.0,>=2.0.6 (from spacy)\n",
      "  Downloading catalogue-2.0.10-py3-none-any.whl.metadata (14 kB)\n",
      "Collecting weasel<0.5.0,>=0.1.0 (from spacy)\n",
      "  Downloading weasel-0.4.1-py3-none-any.whl.metadata (4.6 kB)\n",
      "Collecting typer<1.0.0,>=0.3.0 (from spacy)\n",
      "  Downloading typer-0.15.2-py3-none-any.whl.metadata (15 kB)\n",
      "Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in d:\\python310\\lib\\site-packages (from spacy) (4.67.1)\n",
      "Requirement already satisfied: numpy>=1.19.0 in d:\\python310\\lib\\site-packages (from spacy) (1.23.5)\n",
      "Requirement already satisfied: requests<3.0.0,>=2.13.0 in d:\\python310\\lib\\site-packages (from spacy) (2.32.3)\n",
      "Collecting pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4 (from spacy)\n",
      "  Downloading pydantic-2.11.3-py3-none-any.whl.metadata (65 kB)\n",
      "Requirement already satisfied: jinja2 in d:\\python310\\lib\\site-packages (from spacy) (3.1.4)\n",
      "Requirement already satisfied: setuptools in d:\\python310\\lib\\site-packages (from spacy) (58.1.0)\n",
      "Requirement already satisfied: packaging>=20.0 in c:\\users\\admin\\appdata\\roaming\\python\\python310\\site-packages (from spacy) (24.2)\n",
      "Collecting langcodes<4.0.0,>=3.2.0 (from spacy)\n",
      "  Downloading langcodes-3.5.0-py3-none-any.whl.metadata (29 kB)\n",
      "Collecting language-data>=1.2 (from langcodes<4.0.0,>=3.2.0->spacy)\n",
      "  Downloading language_data-1.3.0-py3-none-any.whl.metadata (4.3 kB)\n",
      "Collecting annotated-types>=0.6.0 (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy)\n",
      "  Downloading annotated_types-0.7.0-py3-none-any.whl.metadata (15 kB)\n",
      "Collecting pydantic-core==2.33.1 (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy)\n",
      "  Downloading pydantic_core-2.33.1-cp310-cp310-win_amd64.whl.metadata (6.9 kB)\n",
      "Requirement already satisfied: typing-extensions>=4.12.2 in c:\\users\\admin\\appdata\\roaming\\python\\python310\\site-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy) (4.13.2)\n",
      "Collecting typing-inspection>=0.4.0 (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy)\n",
      "  Downloading typing_inspection-0.4.0-py3-none-any.whl.metadata (2.6 kB)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in d:\\python310\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy) (3.4.1)\n",
      "Requirement already satisfied: idna<4,>=2.5 in d:\\python310\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy) (3.10)\n",
      "Requirement already satisfied: urllib3<3,>=1.21.1 in d:\\python310\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy) (2.4.0)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in d:\\python310\\lib\\site-packages (from requests<3.0.0,>=2.13.0->spacy) (2025.1.31)\n",
      "Collecting blis<1.4.0,>=1.3.0 (from thinc<8.4.0,>=8.3.4->spacy)\n",
      "  Downloading blis-1.3.0-cp310-cp310-win_amd64.whl.metadata (7.6 kB)\n",
      "Collecting confection<1.0.0,>=0.0.1 (from thinc<8.4.0,>=8.3.4->spacy)\n",
      "  Downloading confection-0.1.5-py3-none-any.whl.metadata (19 kB)\n",
      "Collecting numpy>=1.19.0 (from spacy)\n",
      "  Downloading numpy-2.2.5-cp310-cp310-win_amd64.whl.metadata (60 kB)\n",
      "Requirement already satisfied: colorama in c:\\users\\admin\\appdata\\roaming\\python\\python310\\site-packages (from tqdm<5.0.0,>=4.38.0->spacy) (0.4.6)\n",
      "Requirement already satisfied: click>=8.0.0 in d:\\python310\\lib\\site-packages (from typer<1.0.0,>=0.3.0->spacy) (8.1.8)\n",
      "Collecting shellingham>=1.3.0 (from typer<1.0.0,>=0.3.0->spacy)\n",
      "  Downloading shellingham-1.5.4-py2.py3-none-any.whl.metadata (3.5 kB)\n",
      "Collecting rich>=10.11.0 (from typer<1.0.0,>=0.3.0->spacy)\n",
      "  Downloading rich-14.0.0-py3-none-any.whl.metadata (18 kB)\n",
      "Collecting cloudpathlib<1.0.0,>=0.7.0 (from weasel<0.5.0,>=0.1.0->spacy)\n",
      "  Downloading cloudpathlib-0.21.0-py3-none-any.whl.metadata (14 kB)\n",
      "Requirement already satisfied: smart-open<8.0.0,>=5.2.1 in d:\\python310\\lib\\site-packages (from weasel<0.5.0,>=0.1.0->spacy) (7.1.0)\n",
      "Requirement already satisfied: MarkupSafe>=2.0 in d:\\python310\\lib\\site-packages (from jinja2->spacy) (2.1.5)\n",
      "Collecting marisa-trie>=1.1.0 (from language-data>=1.2->langcodes<4.0.0,>=3.2.0->spacy)\n",
      "  Downloading marisa_trie-1.2.1-cp310-cp310-win_amd64.whl.metadata (9.3 kB)\n",
      "Collecting markdown-it-py>=2.2.0 (from rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy)\n",
      "  Downloading markdown_it_py-3.0.0-py3-none-any.whl.metadata (6.9 kB)\n",
      "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in c:\\users\\admin\\appdata\\roaming\\python\\python310\\site-packages (from rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy) (2.19.1)\n",
      "Requirement already satisfied: wrapt in d:\\python310\\lib\\site-packages (from smart-open<8.0.0,>=5.2.1->weasel<0.5.0,>=0.1.0->spacy) (1.17.2)\n",
      "Collecting mdurl~=0.1 (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy)\n",
      "  Downloading mdurl-0.1.2-py3-none-any.whl.metadata (1.6 kB)\n",
      "Downloading spacy-3.8.5-cp310-cp310-win_amd64.whl (12.2 MB)\n",
      "   ---------------------------------------- 0.0/12.2 MB ? eta -:--:--\n",
      "   -- ------------------------------------- 0.8/12.2 MB 6.7 MB/s eta 0:00:02\n",
      "   -------------------- ------------------- 6.3/12.2 MB 19.3 MB/s eta 0:00:01\n",
      "   ---------------------------------------- 12.2/12.2 MB 25.5 MB/s eta 0:00:00\n",
      "Downloading catalogue-2.0.10-py3-none-any.whl (17 kB)\n",
      "Downloading cymem-2.0.11-cp310-cp310-win_amd64.whl (39 kB)\n",
      "Downloading langcodes-3.5.0-py3-none-any.whl (182 kB)\n",
      "Downloading murmurhash-1.0.12-cp310-cp310-win_amd64.whl (25 kB)\n",
      "Downloading preshed-3.0.9-cp310-cp310-win_amd64.whl (122 kB)\n",
      "Downloading pydantic-2.11.3-py3-none-any.whl (443 kB)\n",
      "Downloading pydantic_core-2.33.1-cp310-cp310-win_amd64.whl (2.0 MB)\n",
      "   ---------------------------------------- 0.0/2.0 MB ? eta -:--:--\n",
      "   ---------------------------------------- 2.0/2.0 MB 36.0 MB/s eta 0:00:00\n",
      "Downloading spacy_legacy-3.0.12-py2.py3-none-any.whl (29 kB)\n",
      "Downloading spacy_loggers-1.0.5-py3-none-any.whl (22 kB)\n",
      "Downloading srsly-2.5.1-cp310-cp310-win_amd64.whl (632 kB)\n",
      "   ---------------------------------------- 0.0/632.3 kB ? eta -:--:--\n",
      "   --------------------------------------- 632.3/632.3 kB 23.1 MB/s eta 0:00:00\n",
      "Downloading thinc-8.3.6-cp310-cp310-win_amd64.whl (1.8 MB)\n",
      "   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--\n",
      "   ---------------------------------------- 1.8/1.8 MB 48.8 MB/s eta 0:00:00\n",
      "Downloading numpy-2.2.5-cp310-cp310-win_amd64.whl (12.9 MB)\n",
      "   ---------------------------------------- 0.0/12.9 MB ? eta -:--:--\n",
      "   -------------------------------- ------- 10.5/12.9 MB 50.4 MB/s eta 0:00:01\n",
      "   ---------------------------------------- 12.9/12.9 MB 45.1 MB/s eta 0:00:00\n",
      "Downloading typer-0.15.2-py3-none-any.whl (45 kB)\n",
      "Downloading wasabi-1.1.3-py3-none-any.whl (27 kB)\n",
      "Downloading weasel-0.4.1-py3-none-any.whl (50 kB)\n",
      "Downloading annotated_types-0.7.0-py3-none-any.whl (13 kB)\n",
      "Downloading blis-1.3.0-cp310-cp310-win_amd64.whl (6.2 MB)\n",
      "   ---------------------------------------- 0.0/6.2 MB ? eta -:--:--\n",
      "   ---------------------------------------- 6.2/6.2 MB 42.5 MB/s eta 0:00:00\n",
      "Downloading cloudpathlib-0.21.0-py3-none-any.whl (52 kB)\n",
      "Downloading confection-0.1.5-py3-none-any.whl (35 kB)\n",
      "Downloading language_data-1.3.0-py3-none-any.whl (5.4 MB)\n",
      "   ---------------------------------------- 0.0/5.4 MB ? eta -:--:--\n",
      "   ---------------------------------------- 5.4/5.4 MB 41.0 MB/s eta 0:00:00\n",
      "Downloading rich-14.0.0-py3-none-any.whl (243 kB)\n",
      "Downloading shellingham-1.5.4-py2.py3-none-any.whl (9.8 kB)\n",
      "Downloading typing_inspection-0.4.0-py3-none-any.whl (14 kB)\n",
      "Downloading marisa_trie-1.2.1-cp310-cp310-win_amd64.whl (151 kB)\n",
      "Downloading markdown_it_py-3.0.0-py3-none-any.whl (87 kB)\n",
      "Downloading mdurl-0.1.2-py3-none-any.whl (10.0 kB)\n",
      "Installing collected packages: cymem, wasabi, typing-inspection, spacy-loggers, spacy-legacy, shellingham, pydantic-core, numpy, murmurhash, mdurl, marisa-trie, cloudpathlib, catalogue, annotated-types, srsly, pydantic, preshed, markdown-it-py, language-data, blis, rich, langcodes, confection, typer, thinc, weasel, spacy\n",
      "  Attempting uninstall: numpy\n",
      "    Found existing installation: numpy 1.23.5\n",
      "    Uninstalling numpy-1.23.5:\n",
      "      Successfully uninstalled numpy-1.23.5\n",
      "Successfully installed annotated-types-0.7.0 blis-1.3.0 catalogue-2.0.10 cloudpathlib-0.21.0 confection-0.1.5 cymem-2.0.11 langcodes-3.5.0 language-data-1.3.0 marisa-trie-1.2.1 markdown-it-py-3.0.0 mdurl-0.1.2 murmurhash-1.0.12 numpy-2.2.5 preshed-3.0.9 pydantic-2.11.3 pydantic-core-2.33.1 rich-14.0.0 shellingham-1.5.4 spacy-3.8.5 spacy-legacy-3.0.12 spacy-loggers-1.0.5 srsly-2.5.1 thinc-8.3.6 typer-0.15.2 typing-inspection-0.4.0 wasabi-1.1.3 weasel-0.4.1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
      "gensim 4.3.3 requires numpy<2.0,>=1.18.5, but you have numpy 2.2.5 which is incompatible.\n"
     ]
    }
   ],
   "source": [
    "!pip install spacy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "adede525",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting zh-core-web-sm==3.8.0\n",
      "  Downloading https://github.com/explosion/spacy-models/releases/download/zh_core_web_sm-3.8.0/zh_core_web_sm-3.8.0-py3-none-any.whl (48.5 MB)\n",
      "     ---------------------------------------- 0.0/48.5 MB ? eta -:--:--\n",
      "     ---------------------------------------- 0.0/48.5 MB ? eta -:--:--\n",
      "      --------------------------------------- 1.0/48.5 MB 5.0 MB/s eta 0:00:10\n",
      "     ----- ---------------------------------- 6.8/48.5 MB 17.5 MB/s eta 0:00:03\n",
      "     -------- ------------------------------ 10.5/48.5 MB 22.6 MB/s eta 0:00:02\n",
      "     ---------------- ---------------------- 20.2/48.5 MB 25.5 MB/s eta 0:00:02\n",
      "     ---------------------- ---------------- 28.6/48.5 MB 28.3 MB/s eta 0:00:01\n",
      "     ---------------------------- ---------- 35.4/48.5 MB 29.2 MB/s eta 0:00:01\n",
      "     ----------------------------------- --- 43.8/48.5 MB 31.0 MB/s eta 0:00:01\n",
      "     --------------------------------------- 48.5/48.5 MB 30.9 MB/s eta 0:00:00\n",
      "Collecting spacy-pkuseg<2.0.0,>=1.0.0 (from zh-core-web-sm==3.8.0)\n",
      "  Downloading spacy_pkuseg-1.0.0-cp310-cp310-win_amd64.whl.metadata (13 kB)\n",
      "Requirement already satisfied: srsly<3.0.0,>=2.3.0 in d:\\python310\\lib\\site-packages (from spacy-pkuseg<2.0.0,>=1.0.0->zh-core-web-sm==3.8.0) (2.5.1)\n",
      "Requirement already satisfied: numpy<3.0.0,>=2.0.0 in d:\\python310\\lib\\site-packages (from spacy-pkuseg<2.0.0,>=1.0.0->zh-core-web-sm==3.8.0) (2.2.5)\n",
      "Requirement already satisfied: catalogue<2.1.0,>=2.0.3 in d:\\python310\\lib\\site-packages (from srsly<3.0.0,>=2.3.0->spacy-pkuseg<2.0.0,>=1.0.0->zh-core-web-sm==3.8.0) (2.0.10)\n",
      "Downloading spacy_pkuseg-1.0.0-cp310-cp310-win_amd64.whl (2.4 MB)\n",
      "   ---------------------------------------- 0.0/2.4 MB ? eta -:--:--\n",
      "   ---------------------------------------- 0.0/2.4 MB ? eta -:--:--\n",
      "   ----------------- ---------------------- 1.0/2.4 MB 5.0 MB/s eta 0:00:01\n",
      "   ---------------------------------------- 2.4/2.4 MB 7.7 MB/s eta 0:00:00\n",
      "Installing collected packages: spacy-pkuseg, zh-core-web-sm\n",
      "Successfully installed spacy-pkuseg-1.0.0 zh-core-web-sm-3.8.0\n",
      "\u001b[38;5;2m✔ Download and installation successful\u001b[0m\n",
      "You can now load the package via spacy.load('zh_core_web_sm')\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  WARNING: Retrying (Retry(total=4, connect=None, read=None, redirect=None, status=None)) after connection broken by 'ReadTimeoutError(\"HTTPSConnectionPool(host='github.com', port=443): Read timed out. (read timeout=15)\")': /explosion/spacy-models/releases/download/zh_core_web_sm-3.8.0/zh_core_web_sm-3.8.0-py3-none-any.whl\n"
     ]
    }
   ],
   "source": [
    "!python -m spacy download zh_core_web_sm"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "81a6f12e",
   "metadata": {},
   "source": [
    "下面几段代码展示朴素贝叶斯模型的训练和预测。这里使用的数据集为本书自制的Books数据集，包含约1万本图书的标题，分为3种主题。首先是预处理，针对文本分类的预处理主要包含以下步骤：\n",
    "\n",
    "- 通常可以将英文文本全部转换为小写，或者将中文内容全部转换为简体，等等，这一般不会改变文本内容。\n",
    "- 去除标点。英文中的标点符号和单词之间没有空格（如——“Hi, there!”），如果不去除标点，“Hi,”和“there!”会被识别为不同于“Hi”和“there”的两个词，这显然是不合理的。对于中文，移除标点一般也不会影响文本的内容。\n",
    "- 分词。中文汉字之间没有空格分隔，中文分词有时比英文分词更加困难，此处不再赘述。\n",
    "- 去除停用词（如“I”、“is”、“的”等）。这些词往往大量出现但没有具体含义。\n",
    "- 建立词表。通常会忽略语料库中频率非常低的词。\n",
    "- 将词转换为词表索引（ID），便于机器学习模型使用。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "27cad9a7-ab9d-4503-9603-f18f3c7cb0d8",
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import os\n",
    "import requests\n",
    "import re\n",
    "from tqdm import tqdm\n",
    "from collections import defaultdict\n",
    "from string import punctuation\n",
    "import spacy\n",
    "from spacy.lang.zh.stop_words import STOP_WORDS\n",
    "nlp = spacy.load('zh_core_web_sm')\n",
    "# python -m spacy download zh_core_web_sm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "5936ceb0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "train size = 2157 , test size = 8627\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 2157/2157 [00:06<00:00, 315.87it/s]\n",
      "100%|██████████| 8627/8627 [00:25<00:00, 333.49it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['新', 'ip', '面向', '泛在', '全', '场景', '未来', '数据', '网络']\n",
      "{'计算机类': 0, '艺术传媒类': 1, '经管类': 2}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "class BooksDataset:\n",
    "    def __init__(self):\n",
    "        train_file, test_file = r\"D:\\Desktop\\社会舆情分析\\@Hands-on-NLP-main\\test.jsonl\",r\"D:\\Desktop\\社会舆情分析\\@Hands-on-NLP-main\\train.jsonl\"\n",
    "\n",
    "        # 下载数据为JSON格式，转化为Python对象\n",
    "        def read_file(file_name):\n",
    "            with open(file_name, 'r', encoding='utf-8') as fin:\n",
    "                json_list = list(fin)\n",
    "            data_split = []\n",
    "            for json_str in json_list:\n",
    "                data_split.append(json.loads(json_str))\n",
    "            return data_split\n",
    "\n",
    "        self.train_data, self.test_data = read_file(train_file),\\\n",
    "            read_file(test_file)\n",
    "        print('train size =', len(self.train_data), \n",
    "              ', test size =', len(self.test_data))\n",
    "        \n",
    "        # 建立文本标签和数字标签的映射\n",
    "        self.label2id, self.id2label = {}, {}\n",
    "        for data_split in [self.train_data, self.test_data]:\n",
    "            for data in data_split:\n",
    "                txt = data['class']\n",
    "                if txt not in self.label2id:\n",
    "                    idx = len(self.label2id)\n",
    "                    self.label2id[txt] = idx\n",
    "                    self.id2label[idx] = txt\n",
    "                label_id = self.label2id[txt]\n",
    "                data['label'] = label_id\n",
    "\n",
    "    def tokenize(self, attr='book'):\n",
    "        # 使用以下两行命令安装spacy用于中文分词\n",
    "        # pip install -U spacy\n",
    "        # python -m spacy download zh_core_web_sm\n",
    "        # 去除文本中的符号和停用词\n",
    "        for data_split in [self.train_data, self.test_data]:\n",
    "            for data in tqdm(data_split):\n",
    "                # 转为小写\n",
    "                text = data[attr].lower()\n",
    "                # 符号替换为空\n",
    "                tokens = [t.text for t in nlp(text) \\\n",
    "                    if t.text not in STOP_WORDS]\n",
    "                # 这一步比较耗时，因此把tokenize的结果储存起来\n",
    "                data['tokens'] = tokens\n",
    "\n",
    "    # 根据分词结果建立词表，忽略部分低频词，\n",
    "    # 可以设置词最短长度和词表最大大小\n",
    "    def build_vocab(self, min_freq=3, min_len=2, max_size=None):\n",
    "        frequency = defaultdict(int)\n",
    "        for data in self.train_data:\n",
    "            tokens = data['tokens']\n",
    "            for token in tokens:\n",
    "                frequency[token] += 1 \n",
    "\n",
    "        print(f'unique tokens = {len(frequency)}, '+\\\n",
    "              f'total counts = {sum(frequency.values())}, '+\\\n",
    "              f'max freq = {max(frequency.values())}, '+\\\n",
    "              f'min freq = {min(frequency.values())}')    \n",
    "\n",
    "        self.token2id = {}\n",
    "        self.id2token = {}\n",
    "        total_count = 0\n",
    "        for token, freq in sorted(frequency.items(),\\\n",
    "            key=lambda x: -x[1]):\n",
    "            if max_size and len(self.token2id) >= max_size:\n",
    "                break\n",
    "            if freq > min_freq:\n",
    "                if (min_len is None) or (min_len and \\\n",
    "                    len(token) >= min_len):\n",
    "                    self.token2id[token] = len(self.token2id)\n",
    "                    self.id2token[len(self.id2token)] = token\n",
    "                    total_count += freq\n",
    "            else:\n",
    "                break\n",
    "        print(f'min_freq = {min_freq}, min_len = {min_len}, '+\\\n",
    "              f'max_size = {max_size}, '\n",
    "              f'remaining tokens = {len(self.token2id)}, '\n",
    "              f'in-vocab rate = {total_count / sum(frequency.values())}')\n",
    "\n",
    "    # 将分词后的结果转化为数字索引\n",
    "    def convert_tokens_to_ids(self):\n",
    "        for data_split in [self.train_data, self.test_data]:\n",
    "            for data in data_split:\n",
    "                data['token_ids'] = []\n",
    "                for token in data['tokens']:\n",
    "                    if token in self.token2id:\n",
    "                        data['token_ids'].append(self.token2id[token])\n",
    "\n",
    "        \n",
    "dataset = BooksDataset()\n",
    "dataset.tokenize()\n",
    "print(dataset.train_data[0]['tokens'])\n",
    "print(dataset.label2id)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "26d79e05",
   "metadata": {},
   "source": [
    "完成分词后，对出现次数超过3次的词元建立词表，并将分词后的文档转化为词元id的序列。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "b3b4c04f-99e6-4b04-91c9-7603ea5f7100",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['d:\\\\Python310', 'd:\\\\Python310\\\\lib\\\\site-packages']\n"
     ]
    }
   ],
   "source": [
    "import site\n",
    "print(site.getsitepackages())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "0d6b1918",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "unique tokens = 3185, total counts = 13539, max freq = 402, min freq = 1\n",
      "min_freq = 3, min_len = 2, max_size = None, remaining tokens = 586, in-vocab rate = 0.6830637417829972\n",
      "[340, 471, 186, 15, 23]\n"
     ]
    }
   ],
   "source": [
    "dataset.build_vocab(min_freq=3)\n",
    "dataset.convert_tokens_to_ids()\n",
    "print(dataset.train_data[0]['token_ids'])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d096d95f",
   "metadata": {},
   "source": [
    "接下来将数据和标签准备成便于训练的矩阵格式。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "ba632265",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "train_X, train_Y = [], []\n",
    "test_X, test_Y = [], []\n",
    "\n",
    "for data in dataset.train_data:\n",
    "    x = np.zeros(len(dataset.token2id), dtype=np.int32)\n",
    "    for token_id in data['token_ids']:\n",
    "        x[token_id] += 1\n",
    "    train_X.append(x)\n",
    "    train_Y.append(data['label'])\n",
    "for data in dataset.test_data:\n",
    "    x = np.zeros(len(dataset.token2id), dtype=np.int32)\n",
    "    for token_id in data['token_ids']:\n",
    "        x[token_id] += 1\n",
    "    test_X.append(x)\n",
    "    test_Y.append(data['label'])\n",
    "train_X, train_Y = np.array(train_X), np.array(train_Y)\n",
    "test_X, test_Y = np.array(test_X), np.array(test_Y)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3938acdb",
   "metadata": {},
   "source": [
    "下面代码展示朴素贝叶斯的训练和预测。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "f13251b7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "P(计算机类) = 0.4571163653222068\n",
      "P(艺术传媒类) = 0.2494204914232731\n",
      "P(经管类) = 0.2934631432545202\n",
      "P(教程|计算机类) = 0.6049382716049383\n",
      "P(设计|计算机类) = 0.6\n",
      "P(基础|计算机类) = 0.6176470588235294\n",
      "test example-0, prediction = 0, label = 0\n",
      "test example-1, prediction = 1, label = 1\n",
      "test example-2, prediction = 0, label = 0\n",
      "test example-3, prediction = 0, label = 0\n",
      "test example-4, prediction = 0, label = 0\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "\n",
    "class NaiveBayes:\n",
    "    def __init__(self, num_classes, vocab_size):\n",
    "        self.num_classes = num_classes\n",
    "        self.vocab_size = vocab_size\n",
    "        self.prior = np.zeros(num_classes, dtype=np.float64)\n",
    "        self.likelihood = np.zeros((num_classes, vocab_size),\\\n",
    "            dtype=np.float64)\n",
    "        \n",
    "    def fit(self, X, Y):\n",
    "        # NaiveBayes的训练主要涉及先验概率和似然的估计，\n",
    "        # 这两者都可以通过计数简单获得\n",
    "        for x, y in zip(X, Y):\n",
    "            self.prior[y] += 1\n",
    "            for token_id in x:\n",
    "                self.likelihood[y, token_id] += 1\n",
    "                \n",
    "        self.prior /= self.prior.sum()\n",
    "        # laplace平滑\n",
    "        self.likelihood += 1\n",
    "        self.likelihood /= self.likelihood.sum(axis=0)\n",
    "        # 为了避免精度溢出，使用对数概率\n",
    "        self.prior = np.log(self.prior)\n",
    "        self.likelihood = np.log(self.likelihood)\n",
    "    \n",
    "    def predict(self, X):\n",
    "        # 算出各个类别的先验概率与似然的乘积，找出最大的作为分类结果\n",
    "        preds = []\n",
    "        for x in X:\n",
    "            p = np.zeros(self.num_classes, dtype=np.float64)\n",
    "            for i in range(self.num_classes):\n",
    "                p[i] += self.prior[i]\n",
    "                for token in x:\n",
    "                    p[i] += self.likelihood[i, token]\n",
    "            preds.append(np.argmax(p))\n",
    "        return preds\n",
    "\n",
    "nb = NaiveBayes(len(dataset.label2id), len(dataset.token2id))\n",
    "train_X, train_Y = [], []\n",
    "for data in dataset.train_data:\n",
    "    train_X.append(data['token_ids'])\n",
    "    train_Y.append(data['label'])\n",
    "nb.fit(train_X, train_Y)\n",
    "\n",
    "for i in range(3):\n",
    "    print(f'P({dataset.id2label[i]}) = {np.exp(nb.prior[i])}')\n",
    "for i in range(3):\n",
    "    print(f'P({dataset.id2token[i]}|{dataset.id2label[0]}) = '+\\\n",
    "          f'{np.exp(nb.likelihood[0, i])}')\n",
    "\n",
    "test_X, test_Y = [], []\n",
    "for data in dataset.test_data:\n",
    "    test_X.append(data['token_ids'])\n",
    "    test_Y.append(data['label'])\n",
    "    \n",
    "NB_preds = nb.predict(test_X)\n",
    "    \n",
    "for i, (p, y) in enumerate(zip(NB_preds, test_Y)):\n",
    "    if i >= 5:\n",
    "        break\n",
    "    print(f'test example-{i}, prediction = {p}, label = {y}')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a1cf6399",
   "metadata": {},
   "source": [
    "下面使用第3章介绍的TF-IDF方法得到文档的特征向量，并使用PyTorch实现逻辑斯谛回归模型的训练和预测。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "07765960",
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append(r\"D:\\Desktop\\社会舆情分析\\@Hands-on-NLP-main\")\n",
    "\n",
    "import my_utils"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "21a3bc79",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import sys\n",
    "sys.path.append('../code')\n",
    "from my_utils import TFIDF  # utils 和系统中的 utils 库重名了，如上代码显示，因此可以将自己编写的 utils 重命名为 my_utils\n",
    "        \n",
    "tfidf = TFIDF(len(dataset.token2id))\n",
    "tfidf.fit(train_X)\n",
    "train_F = tfidf.transform(train_X)\n",
    "test_F = tfidf.transform(test_X)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "dc8af30b",
   "metadata": {},
   "source": [
    "逻辑斯谛回归可以看作一个一层的神经网络模型，使用PyTorch实现可以方便地利用自动求导功能。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "1ddebf0c",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "epoch-49, loss=0.5635: 100%|█| 50/50 [00:01<00:00, 30.94it/s\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAGwCAYAAABVdURTAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjEsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvc2/+5QAAAAlwSFlzAAAPYQAAD2EBqD+naQAAbgRJREFUeJzt3Xd4VFX6B/Dv9EkPkAYhECBIb9IMoICgIIhY1oIoiIorgqjYwAJ2+K2KWFDWgriuq1gQC0qLAoJAaKH3lhBIAoT0ZJKZub8/wtzcO3OnJZNMMvl+nifPZu6cuXPCrOTlPe85r0oQBAFEREREAULt7wkQERER+RKDGyIiIgooDG6IiIgooDC4ISIiooDC4IaIiIgCCoMbIiIiCigMboiIiCigaP09gbpmtVpx9uxZhIWFQaVS+Xs6RERE5AFBEFBYWIgWLVpArXadm2l0wc3Zs2eRkJDg72kQERFRNWRkZKBly5YuxzS64CYsLAxA5R9OeHi4n2dDREREnigoKEBCQoL4e9yVRhfc2JaiwsPDGdwQERE1MJ6UlLCgmIiIiAIKgxsiIiIKKAxuiIiIKKAwuCEiIqKAwuCGiIiIAgqDGyIiIgooDG6IiIgooDC4ISIiooDC4IaIiIgCCoMbIiIiCigMboiIiCigMLghIiKigMLgxscqLFZYrIK/p0FERNRo+TW42bBhA8aMGYMWLVpApVJh+fLlLsefO3cOd999N6644gqo1Wo8/vjjdTJPT2QXlOHZ7/eg/fO/4/p31sNktvh7SkRERI2SX4Ob4uJi9OjRAwsXLvRovMlkQnR0NF544QX06NGjlmfnncIyM5ZuzwAAHD9fjG0nL/l5RkRERI2T1p9vfsMNN+CGG27weHxiYiLeffddAMDixYtra1rVkhQTius6x2LNgWwAwPojORjUPsrPsyIiImp8Ar7mxmQyoaCgQPZVW965syf6tG4CANiXWXvvQ0RERM4FfHAzd+5cREREiF8JCQm19l6hBi3mjOkCANh84iI+33QSgsDiYiIioroU8MHNrFmzkJ+fL35lZGTU6vu1jw2FVq0CALz8ywF8s61234+IiIjkAj64MRgMCA8Pl33VJqNOg5fHdhEfz1q2F8dyimr1PYmIiKhKwAc3/jC+f2tMvzZJfLzucI4fZ0NERNS4+DW4KSoqQlpaGtLS0gAAJ0+eRFpaGtLT0wFULilNmDBB9hrb+KKiIpw/fx5paWk4cOBAXU/drfsHtRG/f23FQazan+XH2RARETUeKsGPFa/r1q3D0KFDHa5PnDgRS5YswX333YdTp05h3bp14nMqlcphfOvWrXHq1CmP3rOgoAARERHIz8+v9SWqN347iI83nAAAaNUqHHp1JLQaJsuIiIi85c3vb78GN/5Ql8HNpmMXMP7TreLjcKMWm2ZeizCjrlbfl4iIKNB48/ubaYRaNDApCksm9cXVlw/zKygz44mlaf6dFBERUYBjcFPLhnSIwScT+uCt23tAq1Zh7cEcHDjLA/6IiIhqC4ObOmDUafCP3i0xokscAODHXWf8PCMiIqLAxeCmDo3u3hwA8MlfJ/Hs93twOKvQzzMiIiIKPAxu6tA1V0QjzFjZq3Tp9gy88ut+P8+IiIgo8DC4qUOhBi0W3dMboYbKAGfTsYs4l1/q51kREREFFgY3dWxgUhT2vTwCV7aKBAAkz/0Dn/51gg02iYiIfITBjZ8M6xQrfv/aioNoM+s3FJRV+HFGREREgYHBjZ9MHJDocO3DP4/X/USIiIgCDIMbPwk1aDH7xs6ya9kFZX6aDRERUeBgcONHEwckon1MqPh41f4sPPSf7TibxyJjIiKi6mJw40catQorpl+NXpeLi0vKLVh9IBvz1xzx78SIiIgaMAY3fqbXqrFsygB0jAsTr526UOzHGRERETVsDG7qAZVKhffG9UL/Nk0BANtPX8Jbqw77eVZEREQNE4ObeuKK2DB8dl9f8fEHfx5DVj4LjImIiLzF4KYesZ1cbHM0pxBFJjMe/XoXVu/P8tOsiIiIGhYGN/XYkewifLTuGH7ZfRYPfbnD39MhIiJqELTuh1Bdah8TiqM5RQCAV3894OfZEBERNTzM3NQzH0/og+s7xyo+Z7ZY63g2REREDQ+Dm3qmTVQIPp7QBysfvxrxkUGy5y4Wl/tpVkRERA0Hg5t6qmNcOP43ub/sWv83UvDO5QP+TGaLP6ZFRERU7zG4qcdaNwtBRJBOdu3dlKP4cstpdJm9Cj/vPuunmREREdVfDG7qubUzBiNJ0n8KAF5cvg9mq4Bvt2X4aVZERET1F4Obei46zIAhV0QrPhdi0NTxbIiIiOo/BjcNgEatUry+an82SsrNdTwbIiKi+o3BTQNwc694p891nr0Kv+5h7Q0REZENg5sGoFPzcPzx5GBc5+T8m2n/24UT54vqeFZERET1E4ObBqJtdCgeuqat0+ezC0x1OBsiIqL6i8FNA9I3sSmWPnQVfn10kHgtNtwAAMgvrcDsn/ZxiYqIiBo9lSAIgr8nUZcKCgoQERGB/Px8hIeH+3s61fbX0fMoN1uxeNNJbDp2ET0TIpGWkQcAODVvtH8nR0RE5GPe/P5m5qaBurp9NIZ1ikWIvrL3qS2wAYCyCp5eTEREjReDmwYu1OjY2D09twT7z+bjjn9vxrZTuX6YFRERkf8wuGngbJkbqf9uOY3R721E6slc3L5osx9mRURE5D8Mbho4i0LJ1H82n/bDTIiIiOoHBjcNXFm5+/qaUg/GEBERBQoGNw1cmbkqcHlpTGfFMZl5JQAAq1WA1dqoNscREVEjxOCmgWseESR+f1vvlopjsvJNyCkoQ49XVuPZH/bU1dSIiIj8wrEalRqU6de2x6XictzcKx6hBuWP81x+KbIKylBYZsZ3O87g6ZEdEBNmrOOZEhER1Q0GNw1cRLAO8+/s6XLMwXOF0EhydJuPX8TYns6bcRIRETVkDG4agcWbTsoe55VU+GkmREREtY81N43QnJ/3I3HmCryXctTfUyEiIvI5BjcBZkiHaI/Hzl9zBCbJbqv80grupiIiogaPy1IB5t27emHriYto2SQYFRYrxi7c5HJ8QakZ0WEaHM4qxIgFG3BD1zh8dE/vOpotERGR7zG4CTARQTpc3yVOfDwwqRk2HbvodHx+aQWiwwxYvLGyLuf3fVm1PkciIqLaxGWpAPfl/f2x56Xrodcof9QFZZXFxRUWa11Oi4iIqNb4NbjZsGEDxowZgxYtWkClUmH58uVuX7Nu3TpceeWVMBgMSEpKwpIlS2p9ng2ZWq1CuFGHP58egvH9Wzk8n19aGdyUM7ghIqIA4dfgpri4GD169MDChQs9Gn/y5EmMHj0aQ4cORVpaGh5//HE8+OCDWLVqVS3PtOGLjwxSPMG4oNQxcyMoNOMkIiJqKPxac3PDDTfghhtu8Hj8okWL0KZNG7z99tsAgE6dOmHjxo145513MGLECMXXmEwmmEwm8XFBQUHNJt2AdYuPwPWdY7H6QLZ47dvtGRjbMx4VlqqApvdrazGsYwz2ZuZj7q3d0KtVE39Ml4iIqFoaVM3N5s2bMXz4cNm1ESNGYPPmzU5fM3fuXERERIhfCQkJtT3NekunUePjCX1k1zYdu4j0iyWyzE1ucTm+23EGh7IK8cAX2+t6mkRERDXSoIKbrKwsxMbGyq7FxsaioKAApaWliq+ZNWsW8vPzxa+MjIy6mGq99u5dPWWPr3nzT5y8UKw4Nre4vA5mRERE5DsBvxXcYDDAYDD4exr1ytie8RiYFIU+r60Vr525pBwcEhERNTQNKnMTFxeH7Oxs2bXs7GyEh4cjKCjIT7NqmKJCDeibyFoaIiIKPA0quElOTkZKSors2po1a5CcnOynGTVs827r7u8pEBER+Zxfg5uioiKkpaUhLS0NQOVW77S0NKSnpwOorJeZMGGCOP7hhx/GiRMn8Mwzz+DQoUP48MMP8e233+KJJ57wx/QbvHbRoVj5+NVux5WUm/HRuuM4fr6oDmZFRERUM34NbrZv345evXqhV69eAIAZM2agV69emD17NgDg3LlzYqADAG3atMGKFSuwZs0a9OjRA2+//TY+/fRTp9vAyb2oUPf1SI9/k4b/W3kI//jo7zqYERERUc2ohEZ2YltBQQEiIiKQn5+P8PBwf0/H7yxWAe2e+83hul6rRrnZ8dTiU/NG18W0iIiIZLz5/d2gam7I9zRqleL1zTOvRajBcTNdRm5JbU+JiIioRhjcED6f1BfPjuyIdtEh4rVQoxYWq2NSb9jb61F4udkmERFRfcTghjC0QwymDGmHYH1VpkavUSsGN+UWK7q9tBr7z+bjYpEJi9YfFzuLExER1QcBf4gfea5ZqF78XqVSocLqvFP4h38eR5Beg+93nMGPOzOx6olr6mKKREREbjFzQ6JmIfKdU65KzcOMWmw+fhEAcDi7UDHLQ0RE5A8MbkgUFaZ3P+iy8CAdusVHiI8z2b6BiIjqCQY3JLq9d2XH9J4JkQCAf17T1ulYvUaNMrNFfHw6V7nxJhERUV1jcEOipJhQbJ51Lb556CoAwFMjOjgdm11QhnWHz4uPT1+s3CJeVmHhScZERORXDG5IpnlEEIw6DQBAp1Hj1l7xiuO+23FG9nj7qVwcOFuA2z76G8PeXo99mfmKr1u9PwvPfr8HZRUWxeeJiIhqirulyKXZYzojoWkwcgpNOH2xGIPaR+FfKw87jFuedhbL086Kj/ecyUdXSU2OzUNf7gAAtIsJwUPXtKu9iRMRUaPF4IZcigzW44nrrhAfL92W7mJ0FfvMzOGsQpwvNImP3/jtEG7qEY+4CKNvJkpERHQZl6XIK0F6z+Lh/FL5wX4jFmzAPZ9tlV2b9/tBn82LiIjIhsENeUWn0ItKp3G8Jg1unPVmLTKx7oaIiHyPwQ15pbjcMSAJN+ocrhVIghuTQndxwHnQQ0REVBOsuSGvJMWEit9PvzYJIQYtvvj7lMO4PElwU2QyK96LoQ0REdUGBjfklZ4Jkfj3vb2R2CwEHeLCAAD/3XraYdwfh3Lw295zmPv7QTw6tL3ivZi5ISKi2sBlKfLaiC5xYmADAFMGJymOe+SrncjILcUzP+xRfJ7tqIiIqDYwuKEau6tvAn6eNtDr1zG2ISKi2sDghmpMrVahe8tILJ86EC+N6Yy/nhnq0euky1JHswux/VRubU2RiIgaEdbckM/0TIhEz4RIhzNunJGW3Fz3zgYAwJZZw3iwHxER1QgzN+RzoQbPYmbh8sKU2VK1VTwzr7RW5kRERI0HMzfkcxqFg/6UWK3Ap3+dwIajF8Rreo0a5WYrcgrL0LJJcG1NkYiIAhgzN+Q3WQVleG3FQWw4cl68ZhEETFycikH/9yd2nM6FIAg4c6nEj7MkIqKGhsEN+U2pwmnH5WYrNp+4CAD4cvNpzFt5CIP+708s2XSyrqdHREQNFJelyG+yCsocrpnMVQHP8rSz4vevrjiI+wa2qZN5ERFRw8bMDdWqMGNV/BwR5NiDyp6pQrkPlYdlPERERAxuqHZc2SoSADAhuTUGXxGNzs3D0b1lhNvXOWuyqVYxuiEiIs9wWYpqxWcT++Lv4xcxvHMM9JrKGHryf3a4fd3U/+1UvO7pDiwiIiIGN1QrmoToMbp7c9k1g676iUJmboiIyFNclqI6M3Nkx2q/1pPY5qe0THy84Xi134OIiAIDgxuqMwlNg/HQNW0VnxveKdblaz1ZlnrsmzS88dshHMkurNb8iIgoMDC4Ib9rHmGEVXDdIzyvpAJWq/Mx0iacucXlPpsbERE1PAxuyO8sVgHlTnZJSf3fqkNOn6uwVAU3buIkIiIKcAxuqE61aurYL0opuGkTFYKoUIPs2r/Xn3B633JJ801bQ04iImqcGNxQnbqrbwIeHtwO/32gv3jNIgiy4AQAvnygHwxax/97/nEoG5eKy1FWUXmS8f6z+cjMK5UHR4xtiIgaNW4Fpzql1agx8wb5rqnWTYMdMjctmwRDp3EsIr5/yXYAQLBeg8X39cVdH28BAGyeda04xj5QIiKixoWZG/KbH6YkY3inGLx7Vy9ZQBIXbgTgOgFTUm7BzB/2iI+lbRtMZiu+2noat364CZdYXExE1OgwuCG/6d26KT6d2BeJUSEY1ilGvP7ztIEA3BcGSzdP5ZZUBTHrDp/H8z/uw870PLybchRlFRb8ffwCKpjRISJqFFSC0Lj2lhQUFCAiIgL5+fkIDw/393TosrIKC35OO4shHaIRczlz0/f1tThfaPLo9fGRQcjMK3W4PqZHC+jUKizblYkpQ9rh2RocJEhERP7jze9vZm6oXjDqNLijb4IY2ADwOLABoBjYAICpwoJluzIBAB+t4+nFRESNAYMbCmjOuowTEVHgYnBD9Zazlgxv3NINN9o15XTGZLb4ckpERNQAMLihemv+nT0w99ZuDtdDDBqEGjw7xaCswjFzs/9sPn7efdbpa6xWQTxHh4iIGh6ec0P1VrhRJ9tFZROi1yLEw+AmLSPP4dro9zYCAGLDDOjftpnsuYKyCtz9yRaczSvD+qeHIMyo837iRETkV/Uic7Nw4UIkJibCaDSif//+SE1NdTq2oqICr7zyCtq1awej0YgePXpg5cqVdThbqksxYUbcemU8+rdpKl4L9iJzY0/afPOwQvfwX3afxb7MAuQWlyM9t6Ra70FERP7l9+Bm6dKlmDFjBubMmYOdO3eiR48eGDFiBHJychTHv/DCC/j3v/+N999/HwcOHMDDDz+MW265Bbt27arjmVNdmX9HTyy+r2/VBQGy4KZ1M8d+Vc7klVaI3zuefwzkFlWdl2O2NKpTEoiIAobfg5v58+dj8uTJmDRpEjp37oxFixYhODgYixcvVhz/5Zdf4rnnnsOoUaPQtm1bTJkyBaNGjcLbb79dxzOnuiRdhmoWapA97hYf4fF9LhZJtperKsOb/NIKrNx3DsUmM0oltTY89I+IqGHya81NeXk5duzYgVmzZonX1Go1hg8fjs2bNyu+xmQywWg0yq4FBQVh48aNTsebTFW/0AoKCnwwc/KH/03uj7N5ZegQF4YD5/LF631aN8Gve855dI9Nxy6I39syN1P+uwN/H78IoPIwQJsKZm6IiBokv2ZuLly4AIvFgthY+Zbf2NhYZGVlKb5mxIgRmD9/Po4ePQqr1Yo1a9Zg2bJlOHdO+Zfb3LlzERERIX4lJCT4/OegujGgXRT+0bslAKDCXBV4XNm6ifh9QtMgaNVKC06VXvrlgPj95cSNGNgA8sMAvcncHDhbgFnL9iKnoMzj1xARUe3w+7KUt9599120b98eHTt2hF6vx7Rp0zBp0iSo1co/yqxZs5Cfny9+ZWRk1PGMqTaYJYXB7aJDxe+v7xyHyGC9R/dQKVbdSN+jMrg5llOE+asPI19Sr2Pvxvf/wtep6Xji2zSP3puIiGqPX4ObqKgoaDQaZGdny65nZ2cjLi5O8TXR0dFYvnw5iouLcfr0aRw6dAihoaFo27at4niDwYDw8HDZFzV8o7s3R2y4Abf3bokQgxaL7rkSU4a0w+Sr20KvcR202KjcDCu/nB26/p31eO+PY3hjxUGnY22x1u6MfKdjiIiobvg1uNHr9ejduzdSUlLEa1arFSkpKUhOTnb5WqPRiPj4eJjNZvzwww8YO3ZsbU+X6pGIIB02zxyGN2/vAQAY2bU5nh3ZEXERRui1nv3feum2DLjqG2vL3IiBy5k8t/csZ7sHIiK/8/shfjNmzMDEiRPRp08f9OvXDwsWLEBxcTEmTZoEAJgwYQLi4+Mxd+5cAMDWrVuRmZmJnj17IjMzEy+99BKsViueeeYZf/4Y5AdqJ7U1ngY3aRl5Lk8qtq+58eS+5dxhRUTkd34Pbu68806cP38es2fPRlZWFnr27ImVK1eKRcbp6emyepqysjK88MILOHHiBEJDQzFq1Ch8+eWXiIyM9NNPQPWNp8ENAGw5kQuVClBK4NjvljJI7rvwz2P4+/gFLL6vLwxaTbXnSkREvuf34AYApk2bhmnTpik+t27dOtnjwYMH48CBA4pjiQBAp/E8uCk2maECoLQ49fmmU7j98u4sANh26hKyC8oQG27Em6sOAwB+2nUWd/TlDjwiovqkwe2WInJH70Vw8/Pus7A6Kbs5eK4Aaw/KT8qe/rX8JOxnftiD91KOejU/k9mCPw5lo8hk9up1RETkGQY3FHC8WZZy52iOvP/ULoVGnPPXHPHqnm+tOoz7l2zH1K921mRqRETkBIMbCjgGheAmoWmQwkj3LHZ1Ny7OB/TYfzafBgCsP3K+5jcjIiIH9aLmhsiXpJmbv54ZipX7stA+NhT3fb7N63tJe00B7g/+84TF2ToYERH5BDM3FHCkBcUJTYMx+Zq2iAo1VOteJeV2wY0KLs/G8YSlhq8nIiLXGNxQwLm6fbTDNaNOebv20yM6uLzX4Sx5zY1apXKbeTkr6U8FAOfyS2EyVwVJjG2IiGoXl6Uo4NzaKx4GrRo9EyLFa8F65eAmyEnQY7P5xEXZYxXcdwv/cVcmpg5NAgAcyirAyAV/oUNsGFY9cY37yRMRUY0xc0MBR61WYUyPFkhoGixecxbE6LzcWaVSARVW16cQf7MtHYVllU02f0qrPAH5cHahq5cQEZEPMbihRiHISeZG5+X2J5VKhQo3/aMyckvx0H92AGDxMBGRPzC4oUZBaXs44N1pxsDlzI2bZSmgajnL7MFYIiLyLdbcUKOgUqmw+olrUFpuwccbTmDF3nMAAK3Gu8xNXkkFbvpgo0dj80sqYGX1MBFRnWNwQ43GFbFhAORLRd5mbgAgp9Dk0biCsgqY3dTnEBGR73FZihqdp0d2QJhBi+nD2mNguygA8hOMm4boffI+ZqvAZSkiIj9g5oYanXbRodg1+zpoL2dt9r88AjqNGqPf+wsl5Ra0jQ7BX0cviONDDdpqNbmssFhhlmSJys1Wh75X207lYuuJi5gyJAkaX/R2ICIiBjfUOGkly1Ehhsr/DFZMvxoqFZB5qRRD3loneV5TreCmpNyCrPwy8XHn2Svx3wf7y8bcvmgzACA23Ijb+yR4/R5EROSIy1JEl+m1aug0aiRGheC1m7uK10P0zv8N0DY6xOlzNy/chI3HqjJAZquAuz7eojj25IVixesFZRU4lsMzcoiIvMHMDZEC6fJRsMH5KcYGresTjj2lVikvSd343kak55YgPjII4UE6fHF/X8SEGX3ynkREgYqZGyIF0nNxmgQ7LzB2dn6Ot5yV26TnlgAAMvNKcfBcATZKaoGIiEgZgxsiBdJGm1e3j0Ln5uGK43wW3HhYTFxh4dZyIiJ3GNwQKWgXHSp+H6TX4uMJvRXHOWvr4C0VPAtuyrm1nIjILQY3RAraRlUVCucUlEHv5LC/6hwCqEQpcWNV6Evlrq8VERExuCFSpFarkBRTmb0Z0iFGtnW8meSQP52X7RtcvZ+9MrPF4RqXpYiI3ONuKSInlj0yAGdyS9G5RWW9zfRh7RGk0+D7HRm4WFwOwHeZG6XNUqXlvglu3l17FH8ezsH/JvdHsItt7UREgYJ/0xE5EW7UoXMLnfh4xnVXAAByi004fv4kmobofRfcKNTcvJdy1OGaJx3J7b2z9ggA4NttGbhvYBvvJ0dE1MBwWYrIS09e3wGvjO2Cn6cNrNWamy82n3a4ti8zH89+vwdn80q9fo8y1usQUSPB4IbIS0adBhOSE9GySbCs5uaHKcluXzuuXwIeGNQGYUZ50tTZIX72Ug7lYOn2DDz69S7vJg1A4EYrImokGNwQ1YA0cxNq0LkYWWnurd3x4o2dobVL1QiojDzMFiv2nMlDWYVjvY3UwXMFDtdKyy0YuWADZi3bo/gaK6MbImokGNwQ1YBWkrnRerFzyr4DuK1O+OVfDuCmDzbh9RUHXb7e/p1OnC/CxMWpOJRViK9TMxRfIzC4IaJGggXFRDUgPf9Gp/b83wr2y1C2rMqXW07L/tcZld3rr317vdv3VDg2h4goIDG4IaoB6bKUUuamU/NwxSUk+2Ups5e7oGyxzeKNJ7Hj9CWn46TZGiZuiKixYHBDVAPugpu2USFicCPtNG5/aN+lknKv3rek3IIlm07ilV8PKD5vtlih1ahhlqRrWHNDRI0Fa26IauDq9lEAKrdyKy1LtYg0it8bJIGQfc3Nkr9PYV9mvsfva7EKeOkX5cAGAEyXt31LM0IMbYiosWDmhqgGusZH4Jdpg9BcEsRINY8IEr+XZm40Clu//73hhMO1IJ0GpW52Tikxma0IMQDlkhONWVBMRI0FMzdENdStZQSiQg0ID9LBqJP/JyXN3EiDm2ahenjC/jwcT5WLmZuq4Kamy1JKjTyJiOojBjdEPqJRq5A2+3o8MqSdeE2auZHW5/zfbd0dXq+UWQkPcn92jhLT5aab0nYN1WndYDNr2R4kz0tBfklFte9BRFRXGNwQ+ZBRp8GJ88Xi4w5xYeL30sxN2+hQDL4iWvbaX/ecc7hfdTM3tpobaaNNk93ylrNlKrNCc86vUzOQXWDCdzuUz9AhIqpPGNwQ+Vhyu2YAKndKGXUa8bq+Gn2oWjUNrtYcyhWCG2n9zar9Wejx8mqkHMyWve7DdcfQ/eXVOHDWcfs64Hi+DhFRfcTghsjH7uqXgPfH9cIPUwbIruu08v/cPIkTkts2q9YcbMtS0q3gJknjzH9+uQMFZWY88MV2LFh7BH8fvwAA+NfKwygpt+Cln/cr3pehDRE1BNUKbr744gusWLFCfPzMM88gMjISAwYMwOnTrk9WJQp0Bq0GY3q0QJMQedGwwS5z8+R1Hdzeq3frJtWag6nCipk/7ME9n24Vry3flYljOUUOYxesPYq7P9mKM5dK3N5XqXs5EVF9U63g5o033kBQUGWh5ObNm7Fw4UL861//QlRUFJ544gmfTpAoUIQHyetnurWMwFPXX+HyNTHhylvM3SkymfHNtgzkFJrEa1YBLruJXyhSPkhQWpvDZSkiagiqVa2YkZGBpKQkAMDy5ctx22234aGHHsLAgQMxZMgQX86PqMF77eau+GzjScy+sYvDcwatRuEVVezbNHgqq6BM8frBcwVIv6icoalQKCSuvF4V3DBzQ0QNQbUyN6Ghobh48SIAYPXq1bjuuusAAEajEaWlpb6bHVEAuOeq1vjzqSFo1cyxONjs5uwY6UnGTUM8OxsHAM7lKwc3AHDNm38qXq+Q1OQIkvOMpYXIJy4Uo9hkdnrvgrIKDHt7Hf5v5SGP50pE5GvVCm6uu+46PPjgg3jwwQdx5MgRjBo1CgCwf/9+JCYm+nJ+RAHNYlXOlthIMzdNgj0/8+Zcnvf/yJAGMUUmC9YeyEZZhUXceQUAn286heHzlTuQf7s9A91fWo3j54vx0brjXr8/EZGvVCu4WbhwIZKTk3H+/Hn88MMPaNasckfHjh07MG7cOJ9OkCiQOVkJAgB0aRHu0IPKU2ddZG6ckS4/HTxXgAf/sx3vrD0i7ryycZYVeub7PV6/JxFRbahWzU1kZCQ++OADh+svv/xyjSdE1Jg4y9z885q2mDggUVbA26ppMI5LDgh0JfVkruxxRJAO+aVVpwuHGbQotFtekmZobJZuy8Dd/Vp59J5ERPVFtTI3K1euxMaNG8XHCxcuRM+ePXH33Xfj0qVLXt9v4cKFSExMhNFoRP/+/ZGamupy/IIFC9ChQwcEBQUhISEBTzzxBMrKvP+XKpG/VTipuXlmZEe0iAySXWvdLATPj+pUrfcJ0skLl9vFhDrORSGNlFdSgZX7sqr1nkRE/lKt4Obpp59GQUHlCaZ79+7Fk08+iVGjRuHkyZOYMWOGV/daunQpZsyYgTlz5mDnzp3o0aMHRowYgZycHMXx//vf/zBz5kzMmTMHBw8exGeffYalS5fiueeeq86PQuRXFifBjbPlqBFd4qr1PsF6eXDTWqG4udzJGtnc31kcTEQNS7WWpU6ePInOnTsDAH744QfceOONeOONN7Bz506xuNhT8+fPx+TJkzFp0iQAwKJFi7BixQosXrwYM2fOdBj/999/Y+DAgbj77rsBAImJiRg3bhy2bt3qMBYATCYTTKaqsz5sQRlRfWC2a2b54o2d0TTEeeGwVlMV9Cx96CpsOHoeC/90X7xrtMvcxEU4np/jbCs4EVFDU63MjV6vR0lJ5VkZa9euxfXXXw8AaNq0qVfBQ3l5OXbs2IHhw4dXTUitxvDhw7F582bF1wwYMAA7duwQl65OnDiB3377zWlQNXfuXERERIhfCQkJHs+PqLZZJQfkbZ51LR4Y1Aa39Grp0Wvbx4bh6REdPRobYpAHN80VDgdUqrkhImqIqhXcDBo0CDNmzMCrr76K1NRUjB49GgBw5MgRtGzp2V/MAHDhwgVYLBbExsbKrsfGxiIrS3md/+6778Yrr7yCQYMGQafToV27dhgyZIjTZalZs2YhPz9f/MrIYFdjqj9sncGNOjWaRwS5GQ3EhBmQ2CwYSTGhTreGv3yT42GBwXp5krZ5pON7VTdzk5Vfhqlf7azWa4mIakO1gpsPPvgAWq0W33//PT766CPEx8cDAH7//XeMHDnSpxO0t27dOrzxxhv48MMPsXPnTixbtgwrVqzAq6++qjjeYDAgPDxc9kVUXwzpEI3/Te6Pv5651u1YQRCg1aix+onBWPX4NU5bIdi3eQCAUINdcKO4LOX6QEFnXv5lP1bsPVet1xIR1YZq1dy0atUKv/76q8P1d955x6v7REVFQaPRIDs7W3Y9OzsbcXHKhZMvvvgi7r33Xjz44IMAgG7duqG4uBgPPfQQnn/+eajVbHRODYdKpcKAdlFevUZv1138zX90x9OSM2bsszSV1+TLUmFGx6yPN8tSgiCIwdUpJ+0cpGOIiOpStYIbALBYLFi+fDkOHjwIAOjSpQtuuukmaDSue+VI6fV69O7dGykpKbj55psBAFarFSkpKZg2bZria0pKShwCGNt7Shv8ETUWt/dJwIWicrHlQYib4CZIp3EIkADnu6WUVFgE6LUqh3tLWQVAw9iGiPygWsHNsWPHMGrUKGRmZqJDhw4AKgt3ExISsGLFCrRr187je82YMQMTJ05Enz590K9fPyxYsADFxcXi7qkJEyYgPj4ec+fOBQCMGTMG8+fPR69evdC/f38cO3YML774IsaMGeNVYEXU0LgK3aUBRrDB8b8D6W4pnUYFvcYxuCmrsDhcc8ZstUIPNUxmi9PmnmarFRo1/5skorpXreBm+vTpaNeuHbZs2YKmTZsCAC5evIh77rkH06dPx4oVKzy+15133onz589j9uzZyMrKQs+ePbFy5UqxyDg9PV2WqXnhhRegUqnwwgsvIDMzE9HR0RgzZgxef/316vwoRAHhjj4J+HXPWQzpEKOYuZFmarQatWLmprTc8+Cmwizgvq9Sse7weadjnJ3hYy/1ZC6mf70LL93UBSO7Vu8cHyIiqWoFN+vXr5cFNgDQrFkzzJs3DwMHDvT6ftOmTXO6DLVu3TrZY61Wizlz5mDOnDlevw9RoArSa/DdwwMAABm5jjUwOkmmRqtWwaAQ3GRcUq6dUXI4u9BlYAO473huc89nW1FutuLh/+7AqXmjPZ4DEZEz1aq+NRgMKCwsdLheVFQEvV5f40kRkSNPS8qUAhdppkanUcOgVWNIh2jZmE3HLno8lzv+rXwOlZT9AYXO8HwdIvK1agU3N954Ix566CFs3boVgiBAEARs2bIFDz/8MG666SZfz5GIID+d2JUmIY7/wJDW2Gg1KqhUKiyZ1M/pWTm+YHbSFJSIqLZVK7h577330K5dOyQnJ8NoNMJoNGLAgAFISkrCggULfDxFosbt6REd0LpZMB4ZkuTReJ1Gjb0vXY+P7+0tXpPV3EgKgDfPGoanrr/Cd5OV8LTmhojI16pVcxMZGYmffvoJx44dE7eCd+rUCUlJnv3lS0Semzo0CVOHevffVphRh6gwg/hYHtxUfW/UaRAVakBNRYUacKHIJLvm6bKUp1bvz0LGpVI8MKiNT+9LRIHH4+DGXbfvP//8U/x+/vz51Z8REflEkGT7t0ErX5aSUto5BQDj+7fCoaxC7Dh9ye17KdX5vPLrAbz5j+6IDNZDEAQczi5Em6gQGLTV2x7+0Jc7AAD7z+bjkSFJSIoJdTmehwgSNV4eBze7du3yaBz/MiGqH6Rn28hrbuSBiLMgIUin8XjHk9LB4GsOZOOp7/bg04l9sDwtE08s3Y2hHaLx+aR+ivewP3vn+Pki/JR2Fg8MaoOIoKraoGU7M7FsZ6bLnVWXistx4/sbMapbHJ4f3dmjn4GIAofHwY00M0NE9Z80cyPbLWV36F7n5sr91gw6NcwenlqscfKPmrUHK1urLN54CgDwp4vt4wadPLgZ9vZ6AMCSTScx84ZOHs3D5ovNp5CZV4pP/jrJ4IaoEWIjJqIAZZQEC2pJQGO/LKXVqBEb7lh3Y7YKHncKV7vJ2Do5xFh2f+lylfS05IIyM577ca9H87CxspiZqFFjcEMUoKTLUtJf9jqF1gtKRcXHc4o8LgpWO4teUFn74uz5ReuOi99L63ZOnC/26H2JiJQwuCEKUNJgQZrIiI8MchjbzC64UauAiQMSPW6m6WxZCgAy80qxKz1P8bn9ZwvE78OMVavkJeVmj96XiEhJtbuCE1H9Ji3ut1gFfDaxD77dnoFnR3Z0GNs9PgIbjlTWw3zz0FXo0iIcYUadx5kbV6tSj3+TJnucmVcqBlinJa0ipOfilHjQ54q7oYjIGQY3RI1Au+gQtI8Nw7BOsYrPTx2ahLP5pRjZJQ5XtW1W9bqYEGQVlLm9v8bFstR2u63kA+f9gV8fHYQ/DuXg4LmqzI1tZ1ZpuQUTFqe6fc/SCguCFZqEAq47qBNR4GNwQxTAVj1+DbILytA+NszluCC9BvPv6Olw/V//6IF/rTyEm3q0wANfbHf6encFxfZeWL4PaRl5smu2HlPf7cjw6B7FJuXg5l8rD+FDSS0PETU+rLkhCmAd4sJwzRXR7gc6ER8ZhHfv6oU+iU3Fa/bLWrf3bul0N5Qz2QrZIFsvKulOKVeKTY51ORarwMCGiBjcEJF70gP2rHbtyV+9uavL3VJKlAKYisv1PfaH+TlTpBDcHMsp8ui1F4pMuPbtdfjgj6MejSeihoXBDRG5pZOcjSNIgpvIYB2MOo3L3VJKLpVUOFyrsFhhtQr4Me2sR/dQytxIa3hceWvVYZw4X4y3Vh/xaDwRNSwMbojILWnBsHR3uC3O8bbmRonZIuB/qenYbVeL40ypQvanoMwxaCo3W7Er/ZJsN9aJCzxHhyiQMbghIrekW67tl6UA5d5S3iqtsOCF5fs8Hn/f59vwyYYTsmvFJseA56nvduOWD//GuylVS1CZl0qrP1EiqvcY3BCRV6TBjW2JKibM6Je5vP7bQdljpcP/ft5ducz14Z/HxGuZeQxuiAIZgxsi8opVEDB9WHsAlcXEAPDC6E4YmNRMNu71W7p6dL87+rT02dyUioxtePYNUePB4IaIvGIVgBnXXYG02ddhbM94AEBMuBFfPXiVOOb+gW3QLT7C7b1u6RWPWV52/HalRGFZykZpOa02Hc0uxIo95+r0PYmoEg/xIyKv2JpwRgbrnY8RBDRx8bzNhOTWCDX67q+hYhc9qeo4tsF172wAAEQE9ceg9lF1++ZEjRwzN0TkFU8zINFh8macL97YGRFBOkQG68RrzUIM0Do5I6d5hHIdT4heI3ssPTPHk55UNXWhyISnvtuNHXZtJZzZfza/lmdERPYY3BCRVzxpFG4VBBh1Gozt2UK8dkVsKHa9eB2eur6DeK1JiA4qlUoxwIkNdwxuusVHYGjHGNm1ji+uxF9HK5t+Kp19487bqw/LHp/LL8Udizbjt73KS0ov/LgP3+84g9s++tuj+7O3J1HdY3BDRF5JjAp2O8aW3BnaoSoQMWg1UKtVskMAQw2VS1I6hVOJlQIejVqleILxE0t3A6he5ub9P47JHs/+aT9ST+Xika92Ko4/klPo9XsQUd1izQ0ReWTpQ1dh47ELGNevlduxtqWriKCqJSi9tjIoCZdcs52fo9WoALvz95Q6jQuCUDnWji1j46rmxsbsJvWUle+6C7rV6l3xjgpM3RDVNQY3ROSR/m2boX/bZu4HomrbtTSQsWVcRnVrjrUHc9C/TVUzTsXMjUIQYzJbFceWVlggCALyFdo62CtXCG6sVkHsj1XhJvixuKg5OnWhGAKANlEh4jUuSxHVPQY3RORztqUnafGwLXOj06jx/rhesvFK7RuUgpjCMrPidQBoM+s3j+ZmqnAMXorLzQgzVs5VKfiRsliUgxuT2YIhb60DABx6daRHcyGi2sGaGyLyOVtyQ7os5WxXlDNKu6UKyipkTTyVDEpyve3aZHYMXgrLqpazyiXPL954EgPn/YEDZ6sacpqdLEuVSup98jzIIBFR7WFwQ0Q+p1Rz4+0xM4IAbHh6KB4Z0k68VlhmhtZJ5sZm/h09XD5vMjsWHUtPNpYuS73y6wFk5pXixvf/wrGcIpy6UCxrwClla/Ngfw8V16WI6hyXpYjI52yZG51GjSlD2iG3qByJzZzvslL6/Z8UE4pWzYLxzMiO+HDdcfG6zk0GqGmI68MD3WVuzArLTlYBGD5/PQAgXOHQwTOXSjD7p/2S96j983aIyDkGN0Tkc30Tq4qFnx3Z0avX/jBlANYfzsGE5ETF511lQvQatcvMzpPf7sYPO884XL/to7+xYvogdG4eLluWUiINjlbuO4c3Vx12OI1ZuiWdeRuiusfghoh8Zv3TQ7Dj9CXcfLnnlKekAUDv1k3Qu3UT2fNqVWX2BIDsnBx7QXanF9tTCmxsRr+30e08AfmS08P/tZ2FUywb86+V8oMBiahuseaGiHymdbMQ3HplS3Fbta9MG5oEABjdrTlcHTMT7Ca48QVPjrnZeOyC+D1LbojqHoMbIvI7dwHA9GHt8c1DV+HtO3o49LYa3qnqFGR3mRt/cPWjHcspYn0OUS1gcENEfufuFF+tRo2r2jaDUaeR7bpaO+Ma3NEnQXxcF5kbX0k5mI3h89ejwwtVvbGIyDcY3BBRgyLN3CTFhMGoqwpognX1s4xwzYFs3PrhJpy+WFWb83Vquvj9vZ+l+mNaRAGLwQ0R+Z1XdSl2NS/S4Ma2LPXU9Vf4YFa+YRGAyf/Zjp3peXhiaZp4XauW//VbVmHxum8VESljcENEDYp9zY1RV/XXWNDlQGfate1r9B7S3lA1ZbFW7a5Kzy0Vv7fvndX95dUY98kWn70vUWPG4IaI/M6rxI1dcsOglSxLSWpu4iODqj0fvZtTkL0hbVVVLDkJ2b5HVrnZiq0nc332vkSNGYMbImpQ7LeZSzM3IYaqmpv/PNAPA9p51sXcnq3Jpy9IM02lFVU7o7zttUVEnmNwQ0QNyuSr26J5hBFTh1b2nJLW3EiXk9pFh2LyNW2r9R7umnN6Q6mdAwDofBhAKSmrsOCHHWdwschUq+9DVB/Vz60FRNSoeNNcMjrMgL9nXiu+xiAJEtrFhMrGaqp5gp4vMzcWJycqu+uRVRMnLxRj6FvrAABd48Px66NX19p7EdVHzNwQUYMjDYakB/ddEWsX3FQzgJDWw9T0hGH7HVBlFRZcKi5HTqFyRsVVewlPPfSf7eL3+zILPHrN8fNF+CY13WnXc6KGpF4ENwsXLkRiYiKMRiP69++P1FTnZz4MGTIEKpXK4Wv06NF1OGMi8qWaBBAGrQbzbu2GN27phuYR8iLi3q2boGWTIFzdPkp23V0tjjQbFBGkq/7kAJitAmLDDeLjZTsz0evVNfh9X5bieF8EF0dzirx+zbC312Pmsr34Zlu60zFbT1zEeSdBGVF94vfgZunSpZgxYwbmzJmDnTt3okePHhgxYgRycnIUxy9btgznzp0Tv/bt2weNRoPbb7+9jmdORPXFXf1a4e7+rRyuG3UarHtqCP5zfz/Z9U8n9nF5P2nmJtxYs+DGKgiyHV0Zl0pcjt984qKsOecPO87g/iXbUCTZaeWOtMjaW5uPX1S8vuHIedz58RYM+r8/qn1vorri9+Bm/vz5mDx5MiZNmoTOnTtj0aJFCA4OxuLFixXHN23aFHFxceLXmjVrEBwczOCGiBRpNWqHmp5gvetyQ2lwk2RXx+Mti1WQBSv7z7peJrr3s1R8t/0MzJdf8+R3u/HHoRx8suGEx+8pLbL2VrnZqnj9j0OV/+A0OXmeqD7xa3BTXl6OHTt2YPjw4eI1tVqN4cOHY/PmzR7d47PPPsNdd92FkBDlQ7dMJhMKCgpkX0RUv9S3ztnSguIrW0XW6F72wc2GI+77SD334150fWkVvtx8SryWV1Lu8XsatdUPbsqcBC/Sn4GovvNrcHPhwgVYLBbExsbKrsfGxiIrS3k9Wio1NRX79u3Dgw8+6HTM3LlzERERIX4lJCQ4HUtEgcubAEqauRnRJQ5NQ/TVfl+LVXCaDXGlrMKKF3/aX633rMmylKlCuUt5bQc3viikJrLx+7JUTXz22Wfo1q0b+vXr53TMrFmzkJ+fL35lZGTU4QyJyBPj+lXWy/RLbFpr7+FsW7jShirptZgwI/54cjAig6tXe2MRBFQ4OevGG97coUbLUnZBzKXicsz8YU+tnp6890w++r2Rgu93nKm196DGxa/n3ERFRUGj0SA7O1t2PTs7G3FxcS5fW1xcjG+++QavvPKKy3EGgwEGg8HlGCLyr39e0w49W0aiR0Jkrb2HWqWCUoig06gd6kikoww6NYw6DYJ0GuShwuv3tVgEn2Q9XCU2tp64iDUHsvHUiA4w6jQwSIIbb1tJlFXI5/ryL/uxPO2sV/fw1vRvduF8oQlPfbcb/+jdslbfixoHv2Zu9Ho9evfujZSUFPGa1WpFSkoKkpOTXb72u+++g8lkwj333FPb0ySiWqZRqzAgKUrWPsHX1E7+trPv8QTIAwlbcGDfsNNTFVYrzLV8dsydH2/BpxtPYvGmkwCAIMmylLsDCQvLKpCRW7WDy2SWL0tVZ1u5t6qzbEfkit9PKJ4xYwYmTpyIPn36oF+/fliwYAGKi4sxadIkAMCECRMQHx+PuXPnyl732Wef4eabb0azZtXrHUNEjUu4UYeyCsczWuy7cwPy+g9bL6vqxiemCt/+4r5QZMK32zNwTftodI2PkD136kIxAPmylLtzc0Yu+AuZeVXdyu0Djbo404/1NuRrfg9u7rzzTpw/fx6zZ89GVlYWevbsiZUrV4pFxunp6VDb/ZPr8OHD2LhxI1avXu2PKRNRA/TxhD6Y/vUuPDeqk+y6u8yNjf1Jw54qc1KgWx2Xistx88JNOHOpFP9aeRgHXhkh29auufx3pVpSX9Qi0ujyntLApnK+dsENTyymBsjvwQ0ATJs2DdOmTVN8bt26dQ7XOnTowEifiLzSMyESG54Z6nBdqceToFCb46xHlDtlZt8FN/d/sQ1nLlUFI2fzStE2quocHv3lLJR0GczbpT77ZanqLsd5g3+bk6816N1SREQ1pdSdWylZUd0MRml5ZbCgUaswbWhSte5hsys9T/Z47cEcXCiuWmrTXs5CmSUFzN62c7Avrq6T4IbRDfkYgxsiatS0SpkbhV+20mtPXX+Fx/ffeTkg0WlUeGpEB4zs4nonqDNK2aR5vx/ChM+qevHZ6mWkmRtvgxv7IK4uAg+ln42oJhjcEFGjplhzo/DLVprBmHZte3w6Qd6fql10CD6+t7f4+NqOMYrvM++2btWap7Mg5VBWofh98eX+UzXJ3Njv7GLmhhoiBjdERBJhBq1iEYh9zU2YUV7Lcs9VrXF9lzgMTGqGtlEhDsFNYVll4BEZrK/WYYUnL++EcqXIZMafh3LEbBFQ8y7jzmqNzBYr9p7J90kXc8Y25GsMboioUZNuBf9H75b44ZEB6KnQT8pqt6M7OqzqcNBR3eIwITkRAPDl/f2xdsZghBjkpwQ3kZxw7OzMHVe2nHB/QnBxuRmTlmyTXbMIAswWK95efRh/H78ge25fZr7be9r/3DYv/3IAYz7YiDdXHXZ7D3eYuSFfqxe7pYiI/CWhSTCu7RCDJiF6TBrYBgDQNqqyEW9y26pztOyXZ6TBTf82zaC5XLtjOxdHbdfu4ZWxXcXvtdWJbjxQZHLcmWW2CFix9xze/+MY3v/jGE7NGy0+d+P7GxXvIwiC2End2c7UL7ecBgAsWn8cM2/oWMOZM7oh32LmhogaNZUKmHF9BzGwASp3HU1ITkT72DDxmv3yTKhki/UlhY7d0gDmn4PbYkyPFuJjtVJDKx8oKnNsD5GZV4r1h6s6kXtyGrC0v5Qvd46VVVgwY2kaftt7rlqv/To1HWftzuUhUsLghogaNRU8CzTsExgqSWYmr8QxqJAuSzUJlncVV9qh5QvFCpkbAFi2K1P8/vTFytodV7UyK/dlid8r1dxIr2m8+FmW/H0Ky3Zl4pGvdsque7Is9c7aI5i1bC9u+XCTx+9HjReDGyKiGooK1Ttc6yZpjWDfkdybgMAbtt1Srhw/X9krylUzz8e+SRO/V1qWkgZGOoX2Fc5cLHJsfwF4tii1en9lg+XsAuV7EEkxuCGiRmlAu8p6mvFXtar2Pf73YH/ce1Vr2ZKWTbPQqpqcIL28uFga7HSQLH3VVFG5++Dm4f/uxKGsAtnSk+K9LgdKShkeeXBT9WvkYpHJZUbI2XKcJyfOS9tYfLs9A7d+uAnnCxnokDIWFBNRo/Sf+/shp9CEFpFB1b7HgKQoDEiKcvr8x/f2xtqD2bjtypay69JzdL64vx8uFpsw+j3H4l6tWuVVR3FPdx3NXr4fH95zpcsxmZdK0SEuTLHmRrospdeocepCMYa8tQ4AMPiKaHxxfz+H15RVWPDv9SeU5+3BnEslwc0z3+8BALy9+jDm3dbdg1dTY8PMDRE1SlqNukaBjSeu7xKHf/2jh0PmRpo0iQjSoUuLCCh5+44eaN0s2OV76BUOIfSEq2UpANh8/AIycksUD/GTFhTrtWrM/nm/+Hj9kfMO4wFgwdqj1Zqnja2NhVSRB8tw1Dgxc0NEVMekyzBaFzUrbaJCsP7pofgpLVNWBwMAH46/EoIADO0YjZwCE8a8vxGFHv6yD9JrUGF2nS956ZcDeOmXAzDqHIMn+2UpT2p90jIuOX1Oud2FgJxCE2LDK7ua2/e8AuRF3URSzNwQEdUxaTbE1c4pW+FxiN7x36GD2kdhdPfmCNZrkRgV4lX372C9RrbM40pZhWNQIV2W0mlUsnYPADDu4y3Iyi+TXStRyLzYKGWHnvtxL/q/kYKfd591+jqGNuQMgxsiojomrWOxZR9Gd2vudLxeoXO5we6a/YnIrhh1GoxYsMHj8fYssmUpDSos8uBk84mL+O/lQ/5s7JeQtp3KdXleztepGQCAd9YccTqGiRtyhsENEZEHuresrIvpGFfz3U1KmYoFd/XEf+wKcW3DWjWtqruJCzfiithQh1qbUC8yNzWNCa59a734vV6jglmhR4N9JqmoTB7c3L5oM75KTa984GKFzNVcGduQM6y5ISLywMf39sFXW09jfP/WNb6XUnCj06jRr428oaZtWGJUCL6efBWaRxgR3yQIapXKod7Em2Wpmp6QLF3S0mnUYlNQKfvzb5Tqcr7fnoF7r2rtcrcUszNUHczcEBF5IC7CiCev74C4CGON7+WsGaVRp8Gvjw4SH0szNsntmiExKgQ6jVrxEMAgnefLUu52SnlDr1WjQuEHKrdYYbEK2HLiIsoqLChWqLnZfSYfbWetcLnryVXRMAuKyRlmboiI6phS5sama3wEUp8fBlOFFRGSTuLu6OyWqa6IDcWR7CLFsZ7sbvKURq1S3HmVU2DCpCXbsOHIeTw3ynljTXfH+HBZiqqDmRsiojrmKrgBgJgwIxKauj7fxp50S/n743rhF0kGCACmDm0nfu+sBxUAPDOyg1fva7EKigcNLvn7FDZcPvNG2qvKW/bd1WUY3ZATDG6IiOpYNZtquyTN3Oi1aujU8r/ew4w6PDH8CgBAsYs2Dd4eClgZ3Lhe5tqZnufVPaVUKuftGTxtekqND4MbIqI65kkvJW9JC3i1apVD0bAKVdkdZzUuo7rFOWwxd8diFVChcMCer6hUKoet5lXPuX5tSbkZqSdzXfa7qg6rVcDm4xeRr9ANnuoH1twQEdUxJ7+ra0QrybhonWRfbAcGligsS+2efT3CjFp8tyPDq/c1WwVU+DB4sFoFWWCmgnLzTttzrjz4xXb8ffwiXhjdCfGRQSgpt+C23i3dvMq9H3dl4snvdqN1s2Csf3poje9HvsfghoiojtVK5kYSEOgUdlOpVVXZnKyCMofnbcXLSgcGulJWYUG5DzM3ZqsAvWT+ajUUd2MBwJaTF3E0uxDtnXRW//v4RQDAF5tPISO3FABwdfsoxITXbMfbr3sqT00+fbGkRveh2sNlKSKiOuauoLg6pDU3SlvFVSrHs2eU6DWebykHgENZhV6Nd2fC4q2yJplqlQpmJ6mujNxSXPeO8knLO9OrelnlFVctHxVcPpMnI7cEP+464/KUZGdcFjlTvcDMDRFRHXNTf1st7palbuzeAimHst3ex9vMja9tOZGLdYdzxMcqwKF3lSdu/fBv8XtpQ9GyywcQ3vTBRlwqqUBhmRkTkhO9ujdjm/qPmRsiojpWO5kbleL3APDro4MQF2F02EGlxN/BDQBcLC6veqBSKW41l/Im+7JsZyYuFZfj0uVi4J/TnDfmpIaLmRsiojpWV8tSfzw5GBeKytE1PkJ23fV9/J+WOJtXKn5fmblx/edlMlsRpK9aTnNV07R400lsO5UrPs4udKw/cs//f0bkmv9DdCKiRmbate0BADf3bOGze2plmZvKv9rbRofK+lVpPQhcvN0Kbq9nQmSNXg8AH647Ln6vVjkvKLaR9roCgIJS1ycw783MF7/Pzjd5PT8uS9V/zNwQEdWxm3q0wJWtItEiIshn95QuOWmdZGjsWzQoqWmxrK/PlFGrVG7vOenzVESHGfDJhD5QqVQ4V1DqcrxUeTXqeWrYd5TqAIMbIiI/aNnEu/YK7ugUMjf2PFmWqmlo4sumnEBllsTdPXefqczEZBeYEBdhxPlC77MxNheLTMgrrUC76FDnc+KyVL3H4IaIKABo3WwFBzyrp6npGTzuin+9pXKxFdyeLQjKL/Xu5GDpwYG9X1sLAPjrmaEO/b3eXXsUJS5aV1D9weCGiCgAyNovOAliNB7slooJq9kBd8M7xaKswoIzlzxfGnIl9WQuXvxpn0djbctX7mpugMo/L1tbh0KTGRFB8g7suzLyZMGN2WLFO2uPAAC6tAj3aD7kPywoJiIKANJaGWdbvqUnF6+dcQ0SmjrW/CQ0DcZH46/E0yO86w5uE6zXYMPTQ/H5fX29bsLpzJ4z+e4Hoap+pqDMfeZG2q9KqUeUfXgoHX9JulW9BsoqLEjnKce1gsENEVEAUEmCG42TzI106cqg1TitHbmhW3MMviJafDyqWxyGdojG/x7sj0kDE93ORa1WYWjHGGycWbd9l2wH9Hm7LKU03n4hTFp47KzxqLcmLk7FNW/+iT1n8pyOEQQBq/Zn1csgaNH64/jgj6P+noYiBjdERAFAGqY4y9xIEylGncblrh+jrurcmIcHt8Pnk/phQFIU5ozp4vGcDF62cqipsorLmRsnwc3tTppmehIMmd0EN2+uOoTPN510uH6puBx/HT2veNDg1pOV5+3c9MEm7LrcLuLdtUfxXkpVwLD2YA7++eUOXPPmn27nWJdKys2Y9/shvLX6CC4UVb+Au7YwuCEiCjDOam6kv1+NOjUW3NULYUYtXru5q8NYo67q14P97qu5t3bzaB7ennb8n/v7eTXenslcmbmx9Y+y1yMhEi0iHGuK8krdLzNJl6Xs45RjOUVY+OdxvPzLAYfX3fj+Rtz7WSq+33nG5f1v+fBvXCouxztrj2D+miNiAJV68qLbuflDhbnqD8HTgu+6xOCGiCgASI+ncXbOjXRLtVGnQc+ESOyefT3uuaq1w1hp5sZ+l9W4fq2weda14mNnGSBvg5ur20d5Nd6eu8yNQatW7LuVX1qBpdvS8XVqutN7O9uObrUKskaf9n2wMi+ftrxizzkAwJHsQty8cBP+Onre4V7SpS/L5YDB17vPfEV6ynZ9PPeHu6WIiAKANLhROTmIT3oYni0bo3bym0l6UrHS71eDtir4CTVoFbMlGrUKHePCPO4c7mzenrLV3DgrKDboNIqB3670PHy/Q55Zsd8S7yy4qbBaZX/2JrNVMYCyBQOL1h1HWkYe7v0s1fkPIhnv60MRfUUa3NT0c6sNzNwQEQUATw6Waxqi9/h+0syN0i9YjeQXWqih6t/J4Ub5v5k/n9TX5fuM6BKLzs3D8d8H+ns8N2dW7s/Cp3+dkGVSpPQateIZQPaBjZIKJ0svZovgENw4GwcAoUbnOQVpwGDL2NTX4MZSC/3RfImZGyKiRqJLiwjMvrEzWjZx3/ZBWmdjf5gdIN+RFWLQYt6t3fDn4Rzc1a+VbJzWzdk6fVo3xb/vbet2Pp5YseecuPyjxKBTXpZyRhAEMSvhLHNjtgiymhNb3Y89W5Diqr2F9D628UpNVm3PeXLidG2RBl01PfixNjC4ISIKAJ6uDNw/qI3H99z14nUot1hlmRkb6fJOqFGLu/q1cghsAOVTkeMjg8RaFFeZDF8zaNVO65HsvZtyFP/3+yH8OHUgYsONLpelpM+ZKpTH2TIdzoIfQB5A2b63L9YVBAG3fvQ3ik1mrHzsaq+CNV8yuyiwrg+4LEVEFACUsis11SREj9hw5ROLpQf0ZeQ6P4NF6ZdvuOQ0YPvAKUhXe9vHDVqNx9mOE+eLcTa/DB/8cQyA82Upi1WQFQI7a8RpW2YqcxL82O5l/7398o/JbMXujDwcyylCuos/99omzSgpZZf8jcENEVEAuLJVE8y7tRu+nnxVnbyfWq1C2+gQAEC3+Ain45QyJfZZH6k/nxqCgUnNfDRLOW8yNzbfbs/A8PnrceCs8inJFRarLPBxlrmxisGNq8yNY82N/fk40t1Tzup7nPl97zkcyirw6jVSgiDgp7RM7MvMl82DwY0TCxcuRGJiIoxGI/r374/UVNdV5Hl5eZg6dSqaN28Og8GAK664Ar/99lsdzZaIqH66q18rJLerncBAyQ8PD8DDg9vhlbGO5+TYKHUol2ZP7DM3cRFGXNcp1neTlDBolQuKXTGZrTiWU4SXFM6wASqXZyokQYazZSezB8GN2Wp1+N5+K7j8vTwPbrafysWUr3Zi5IK/PH6Nvb2Z+XjsmzTc+P5G2bJUPYxt/B/cLF26FDNmzMCcOXOwc+dO9OjRAyNGjEBOTo7i+PLyclx33XU4deoUvv/+exw+fBiffPIJ4uPj63jmRESNW5MQPWbe0NHlkphSMKF1Edw4e40vGLQaWbA1TqFGyFtmqxUbJGfWOAs4LJeDFVfLUrLMjUW5oFhal+NNh3JPt+O7kifpwXX6YrH4PTM3CubPn4/Jkydj0qRJ6Ny5MxYtWoTg4GAsXrxYcfzixYuRm5uL5cuXY+DAgUhMTMTgwYPRo0ePOp45ERFVh9pNcGN/9k77mFC0bhbs9ZKSPYNOnrm5q28CJl/teYG1kgPnCvGfzafFx+M/3Yr1RxwP6LNYBQiCgJMXih2ek46x/96+oLhCMqbY5JgFWrU/C1tPOJ5qbP9ntzP9kuI4V6SB4bZTueL3LCi2U15ejh07dmD48OHiNbVajeHDh2Pz5s2Kr/n555+RnJyMqVOnIjY2Fl27dsUbb7wBi0U51WcymVBQUCD7IiIiP5L8MgxT2C2lsdv6dXX7aKx/eihGdo3z+C3UKmDq0HayYMa+5kanUSMq1ODFxB1N/3qXw7WJix1LK6wC8NnGk8gqKHN6L+npxrZlKftzbqTLUsWSHleCIOCVXw7gn1/uwJ0fb3HYni39cyirsODWD//GnR9vkd3DXsrBbPyUlik+ls4lt7gqi8PMjZ0LFy7AYrEgNla+vhobG4usrCzF15w4cQLff/89LBYLfvvtN7z44ot4++238dprrymOnzt3LiIiIsSvhIQEn/8cRETkXJhddqZUUncSopC5sW/bYOtz5c2v0CCdBk+P6Ihx/ar+ztfb1dzoNCrFmiBXgvWe7eYau3ATftl9Vnxstlrx2oqDLl8jzcrYMjb2u6WkdTnSBp6pJ3OxWNK40375S9pvzLYNHwBKnBx4KAgCHvhiOx77Jg3n8ksvz0+5tqg+nnPj92Upb1mtVsTExODjjz9G7969ceedd+L555/HokWLFMfPmjUL+fn54ldGRkYdz5iIqHHb+vwwbH+hKkMvDW6UgotR3ZrLHnvbowqobLUAABOTEwEAEUE6GLQa2S95nUYNnZf3Vso0KdmdkYdHJVmdjNxSF6MrlZulmRvlE4rLJQ0rl+/KxKXicgiCgHP58oyQfadz6eGB0i3kzrauS69fLKpsLGqRHVZY9Xx9XJby6yF+UVFR0Gg0yM7Oll3Pzs5GXJxy+rF58+bQ6XTQaKqi506dOiErKwvl5eXQ6+XHixsMBhgMNUs7EhFR9QXrtQjWV/266ZvYFGfzSpEUE6o43qjT4N/39sY/v9wBQNLHyotfosbLQUv72DCsfPxqaNUqaNQq2YnJWo0Keicd1J2R9tTyNWk2xFlwI83cbD99Cb1eXYPYcAP+eU072bi80nLESTqgS3/uk+er6n7KnRRAS6/bEjPSnVvlsuCm/kU3fs3c6PV69O7dGykpKeI1q9WKlJQUJCcnK75m4MCBOHbsGKySD/jIkSNo3ry5Q2BDRET1T6hBg50vXocfHxnodIw006B0yrE70t5YHePCkRQTBkBeWKvXqL1elnJ1wnBNSc/IsTiruVHItGQXmPDlltOya38fu4hhb6/Dqv2VJR7SjJW0qNlZcGMyK21LlxxWKA1uvDtup074fVlqxowZ+OSTT/DFF1/g4MGDmDJlCoqLizFp0iQAwIQJEzBr1ixx/JQpU5Cbm4vHHnsMR44cwYoVK/DGG29g6tSp/voRiIjIS0ad69OCpU/ZOpQLXqRuDE5OOpZ2sNZWK7ipvd/k0ntXKGwFt1oFpycl2/9JvvLrARw/Xyxmv6RyCquWsDwJbmz1OxbZAYJVQV59zNz4vbfUnXfeifPnz2P27NnIyspCz549sXLlSrHIOD09HWpJOi0hIQGrVq3CE088ge7duyM+Ph6PPfYYnn32WX/9CERE5GNqu11NgHeHxTmLWaQBklaj8jor5OoQvpqSBgz//HIHmkcYZbU0FkFw2uPKXVN4aWAircdxlomSBj2lFZWFy2YnNTf1MLbxf3ADANOmTcO0adMUn1u3bp3DteTkZGzZsqWWZ0VERP4iXZayFRR780s0QtK/SkZyj+osS7k6hK+m7LNC9kXCFqvz4MZdiCYNbqSH8R3OLsSjX+/CP69pi/sGVp35Iw16SsuVMjesuSEiIsI/B7dFdJgBk69p63asRuWYufFGTJhyw0/pr2Gt2vut4L4QE6a8ycXZEpGNVXC+LOWONAApkGRunv9xH87llzm0l5DW/9h2t1U4q7lhcENERI3VrBs6IfW5YU4DDylpzY1eoebm1bFd8MMU5Y0nABDtJICQ/iLWeBHcvHZzZf+st26v+Wn4SqcyA3B5oB7gOnPjbju2dEnJfpu4EulWcFtw47zmxu3t6ly9WJYiIqLGQaXyrMZFbberyd69l8+vsRcTZsDF4nLce1VrxeelSQaVSgW91rP53HNVa9zUswXCjTo89d1uj17jTJCTgwD/veGEy9dZhaogJVivkR3A5y7rIz0MsNjJwX1SssxNuWPNjXyreP2LbhjcEBFRvaNUc+OJj+65Eu1jwxBuVK65sf817M2ylLN7hhm0KHSTdZFSexjg2bNaBTGjEmLQyoIbs4v92LnF5dgu6QXlCaWam43HLojXyuv5IX5cliIionpHcVnKo1+iKqdBSOU95DdxFdwMaNdM8fqvjw5CkGSr+donB+O5UR09mVzlHLxqJFHFIgh4c9VhAECIXfbH/jwcqcFv/olvt5/x6r3ku6Us2HDkPP44lCNeY0ExERGRlxS3gvvgvo6ZG+dZlCWT+mF4p1jMukEeuHSNj8CN3ataRDQL0WNsz3gfzM61jNwSnC80AXAMylwtSxWWeZ5VspGfc2PBX0flnc6lpxXXx+CGy1JERFTvyJalXJxzEx1mEH/hA0CHuDCX97XP3Dg7NgaozBh9OrGP4nPSlSWNWoWYMAOGdogGADxx3RW46YNNLubgcopO5cnOp5FP3OzjtSHpslSxyYymIc47ANTD2IbBDRER1T8aWc2N8+zK0oeuwqcbT+L+gYmIDjM63YlkY/+L2OmheG6oJCfL2IqkP5/UDwBw+mKx4muczcFTl4rLxe/tDxOs7s/hjDQTdKmkHFFOdp8BlRml+obLUkREVO9IMyP6y42S7+qbAADomRApPtc2OhRv3NINSTFhzg/uk7APLDrEhaFNVAh6t25S7fnZ07opUq5uosPWnRuQd1YHUO3zb5yRZoZyCk0uDwmcuWyvLPCqD5i5ISKiekfWOPNy5mZ451isnXENWjYJrvZ97Yt5dRo11jxxDdQqFdo+95vH93EV3Ohc9MwCqr91etPxqt1Kplo8KRmQBzfnC01ud3gdzSlCvzZNa3VO3mDmhoiI6h155qbqV1VSTJis47e3lOIKrUYtK2D2jPPx9g1BJw1M9PLeytYdrirqLffxMpQ9aXBzocjkMpgD6t9ZNwxuiIio3pFubdZ5cc6NO3VxJot0WaprfDjmjOni0etUqsoWFZ4Y06NFtebmKWlBcYVFwPt/HHM5vr6ddcPghoiI6h1pcKN0QnH1+ea3sKtEj3R7uVHreZZJq1Z59LMmNgvG67d09fi+Sow6x/eRFiV7u+w17pMtKLt8Hs69n23FjKVpfs3mMLghIqJ6x1xLwU3ziCCnz0kP5vvqwf4u7+OyoFhdNV+DQhDh7He+Rq1CsN59KexTIzq4PKjQE0p1S+M/2Sp+X51lr5/SMvHZxpP46+gFLNuV6XGrjdrA4IaIiOodaebG+3oY5566vgPG9GiBzyf1dXiufWyo+P3ApCiX9+mb6Lx41lXmRqWSFzWvnTG46nVqNUIN7jM9HWJdn+XjiehQx63dqadycSynCG+tOoycApPCq1w7dbFE3MV1XefYGs+xJrhbioiI6p120SG1ct+IYB3eH9dL8bkPxl2JF37ah4c9qHu5qUcLCALQQ7It3UaasbDP3GhUKlnmJimmKqDSalQIcXNOj0atQmKU8z+bicmt8cXm0wCAebd2w8xlexXHxYQrn1sz6t2/ql2snHmpVDwf544+CdW6h68wuCEionqnWagB658e4tEyja+0ahaM/9zfz6OxKpUKN/dy33LBYJe5cbWlWqtROwQ3r47tghd/2i8+DtJpxNYLeq3aoe2CNPBJaOp8y7xS5gao2S6ss3lVwY03zU5rA4MbIiKql1o3q53sTV0y2P2SV6udlzRr1SqHE5btDwSUxkaRQTrkFMqXj1o3qwpo7N9bylnmpiZyi8vFJUTfFoF7jzU3REREtcR2Js/wTjEAgEkD2zgdq7QspbWrN5KeoRMZ7FhULC001mrUCNYr1/A0CXbeK6q68ksrmLkhIiIKdLbsyfvjrsSO05fQv21THM8pwrGcIoexWoWCYvvu39JlrVZNg3Eku+o+V7VtKsv0aNUqBOk0KCmXt2oI1mtqJfjIK60Q5+sqa1QXmLkhIiKqJYbLmZsgvQaD2kdBp1HjtVu64tYr4/HDlAGysVqFreD2px1LH74yVn7Wzb/v7SPL9Og0agTZZW4mJrfG1ueGOQRNvmCxCsgrrewx5e/MDYMbIiKiWqKUwYgJM2L+HT0dmnVq1I7LUvb1x9KdWC0ig/DMyA7i42C9Rha06DQqh2WpFpFBCDPqHJa7fKXs8uF/rLkhIiIKMLbA5SYv2iToNGqHgmJ79jGJWdINXKdRQ6uxy9zY9eGyZYKqm7m5o09Lj8b5O3PDmhsiIiIfW/rQVSg2WRChUPTrjFajgkatwi294vHjrkwAgMquQafGLpVjvxVcJzkdWatROSxL2Wp27Je7nFGp5Ccqe9pDyt/BDTM3REREPqbVqD0ObNpePrBw7OUsz1u393A61r6lQYXduTTSzI1WrXao4bE9Lx3niv2yWlmFxclIOX8HN8zcEBER+dGyKQOQlpGHq9tHA5BnVexrbtR2MUOFRZ5Kke6m0mlUePDqNvjjUI7D854uS+k1arGOBvAiuGHNDRERUeMVGazHkA4xiktF9lfsl6XsMzfSvlUqlQoD2kVh7q3dql5/+T08LSg22NXsXOFhXysGN0RERKTIIXNjd6FLi3DZY+khfsbLfa26togQr1UFN55nbmxeGtPZo5YTOo3Kp81Oq4PLUkRERPXYr48Owo3vbwTgGOzc3icBJrMV/dpUdikPMWjxw5QB0KhVYl+rtpImpIVlZgCe19xIO5zfN7ANzhe67xbu76wNwOCGiIio3uoQF442kmaY9pkbjVqFiQMSZdfsz8+Rnp1z6kIxAHnQ4kyf1k1QaldjExXqvm2Dv4uJAS5LERER1Tvrnx6CH6YMkAU2gOdbuO21bBIEALjmisqiZU+Wpb57ONnhmv1uLSX1Ibhh5oaIiKiead0sRNYV/dWxXfDW6iMut4m78uujg3Akuwh9EyuzOq6WpXomROLajjEeBTJKGNwQERGRW/cmJ+Keq1pXO+CIDNaLdTmA68zNwvFXIj6yMtOj9HYf39sbb68+gsPZhYqvt9X6+JP/wysiIiJyq7qBjRJp5mbRPb1xi2QXlLsWENd3icOqJ65BsxDl+hvbEpg/MbghIiJqZKRtGpoE69Ahrur8GnfBjY2zTgxJ0aE1mZpPMLghIiJqZKSZGwGARdI0ytOiZaugHN60j2VwQ0RERHXMPoCxetoRU0Ia20jPtokJN1Z7Xr7C4IaIiKiRse8t1b9tM8VxTpIzAOSZm/sGJorfB+v8X1DM3VJERESNjH3mpl+bpvjqwf5ItDtXxyVJ4CPtVWXfidwf/D8DIiIi8ruBSVEO11xt0JImdbSSTFCQ3v+LQgxuiIiIGqEbusbh5IVih3YNnpIuS+kkmZsgZm6IiIjIHz66pzcEQaj2+TnSehxpF/CgelBz4//cEREREflFTQ4GlGZuzJaq74P1DG6IiIioAZLW3JitVvF7Qz3oLeX/GRAREVG99PJNXQEAT153heOTkuimQpK58WWbiOqqF8HNwoULkZiYCKPRiP79+yM1NdXp2CVLlkClUsm+jEb/HxhEREQUaHq3boIjr92AR4e1d3hOuixVYbE6PO9Pfg9uli5dihkzZmDOnDnYuXMnevTogREjRiAnJ8fpa8LDw3Hu3Dnx6/Tp03U4YyIiosZD72SZSbYsxeBGbv78+Zg8eTImTZqEzp07Y9GiRQgODsbixYudvkalUiEuLk78io2NrcMZExERkSxzU432DbXJr8FNeXk5duzYgeHDh4vX1Go1hg8fjs2bNzt9XVFREVq3bo2EhASMHTsW+/fvdzrWZDKhoKBA9kVEREQ1Y+v+rVLJz7mpD/wa3Fy4cAEWi8Uh8xIbG4usrCzF13To0AGLFy/GTz/9hP/+97+wWq0YMGAAzpw5ozh+7ty5iIiIEL8SEhJ8/nMQERE1Np9M6IPR3Zvjl2mDMHVoEtpGh+D5UZ38PS0AgEoQXLXFql1nz55FfHw8/v77byQnJ4vXn3nmGaxfvx5bt251e4+Kigp06tQJ48aNw6uvvurwvMlkgslkEh8XFBQgISEB+fn5CA8P980PQkRERLWqoKAAERERHv3+9usJxVFRUdBoNMjOzpZdz87ORlxcnEf30Ol06NWrF44dO6b4vMFggMFgqPFciYiIqGHw67KUXq9H7969kZKSIl6zWq1ISUmRZXJcsVgs2Lt3L5o3b15b0yQiIqIGxO+9pWbMmIGJEyeiT58+6NevHxYsWIDi4mJMmjQJADBhwgTEx8dj7ty5AIBXXnkFV111FZKSkpCXl4c333wTp0+fxoMPPujPH4OIiIjqCb8HN3feeSfOnz+P2bNnIysrCz179sTKlSvFIuP09HSo1VUJpkuXLmHy5MnIyspCkyZN0Lt3b/z999/o3Lmzv34EIiIiqkf8WlDsD94UJBEREVH94M3vb78f4kdERETkSwxuiIiIKKAwuCEiIqKAwuCGiIiIAgqDGyIiIgooDG6IiIgooDC4ISIiooDC4IaIiIgCCoMbIiIiCih+b79Q12wHMhcUFPh5JkREROQp2+9tTxorNLrgprCwEACQkJDg55kQERGRtwoLCxEREeFyTKPrLWW1WnH27FmEhYVBpVL59N4FBQVISEhARkYG+1bVY/ycGgZ+Tg0HP6uGoaF/ToIgoLCwEC1atJA11FbS6DI3arUaLVu2rNX3CA8Pb5D/x2ls+Dk1DPycGg5+Vg1DQ/6c3GVsbFhQTERERAGFwQ0REREFFAY3PmQwGDBnzhwYDAZ/T4Vc4OfUMPBzajj4WTUMjelzanQFxURERBTYmLkhIiKigMLghoiIiAIKgxsiIiIKKAxuiIiIKKAwuPGRhQsXIjExEUajEf3790dqaqq/p9SozJ07F3379kVYWBhiYmJw88034/Dhw7IxZWVlmDp1Kpo1a4bQ0FDcdtttyM7Olo1JT0/H6NGjERwcjJiYGDz99NMwm811+aM0KvPmzYNKpcLjjz8uXuPnVD9kZmbinnvuQbNmzRAUFIRu3bph+/bt4vOCIGD27Nlo3rw5goKCMHz4cBw9elR2j9zcXIwfPx7h4eGIjIzEAw88gKKiorr+UQKaxWLBiy++iDZt2iAoKAjt2rXDq6++Kuu/1Cg/K4Fq7JtvvhH0er2wePFiYf/+/cLkyZOFyMhIITs7299TazRGjBghfP7558K+ffuEtLQ0YdSoUUKrVq2EoqIicczDDz8sJCQkCCkpKcL27duFq666ShgwYID4vNlsFrp27SoMHz5c2LVrl/Dbb78JUVFRwqxZs/zxIwW81NRUITExUejevbvw2GOPidf5Oflfbm6u0Lp1a+G+++4Ttm7dKpw4cUJYtWqVcOzYMXHMvHnzhIiICGH58uXC7t27hZtuuklo06aNUFpaKo4ZOXKk0KNHD2HLli3CX3/9JSQlJQnjxo3zx48UsF5//XWhWbNmwq+//iqcPHlS+O6774TQ0FDh3XffFcc0xs+KwY0P9OvXT5g6dar42GKxCC1atBDmzp3rx1k1bjk5OQIAYf369YIgCEJeXp6g0+mE7777Thxz8OBBAYCwefNmQRAE4bfffhPUarWQlZUljvnoo4+E8PBwwWQy1e0PEOAKCwuF9u3bC2vWrBEGDx4sBjf8nOqHZ599Vhg0aJDT561WqxAXFye8+eab4rW8vDzBYDAIX3/9tSAIgnDgwAEBgLBt2zZxzO+//y6oVCohMzOz9ibfyIwePVq4//77ZdduvfVWYfz48YIgNN7PistSNVReXo4dO3Zg+PDh4jW1Wo3hw4dj8+bNfpxZ45afnw8AaNq0KQBgx44dqKiokH1OHTt2RKtWrcTPafPmzejWrRtiY2PFMSNGjEBBQQH2799fh7MPfFOnTsXo0aNlnwfAz6m++Pnnn9GnTx/cfvvtiImJQa9evfDJJ5+Iz588eRJZWVmyzykiIgL9+/eXfU6RkZHo06ePOGb48OFQq9XYunVr3f0wAW7AgAFISUnBkSNHAAC7d+/Gxo0bccMNNwBovJ9Vo2uc6WsXLlyAxWKR/UULALGxsTh06JCfZtW4Wa1WPP744xg4cCC6du0KAMjKyoJer0dkZKRsbGxsLLKyssQxSp+j7TnyjW+++QY7d+7Etm3bHJ7j51Q/nDhxAh999BFmzJiB5557Dtu2bcP06dOh1+sxceJE8c9Z6XOQfk4xMTGy57VaLZo2bcrPyYdmzpyJgoICdOzYERqNBhaLBa+//jrGjx8PAI32s2JwQwFn6tSp2LdvHzZu3OjvqZCdjIwMPPbYY1izZg2MRqO/p0NOWK1W9OnTB2+88QYAoFevXti3bx8WLVqEiRMn+nl2JPXtt9/iq6++wv/+9z906dIFaWlpePzxx9GiRYtG/VlxWaqGoqKioNFoHHZzZGdnIy4uzk+zarymTZuGX3/9FX/++SdatmwpXo+Li0N5eTny8vJk46WfU1xcnOLnaHuOam7Hjh3IycnBlVdeCa1WC61Wi/Xr1+O9996DVqtFbGwsP6d6oHnz5ujcubPsWqdOnZCeng6g6s/Z1d97cXFxyMnJkT1vNpuRm5vLz8mHnn76acycORN33XUXunXrhnvvvRdPPPEE5s6dC6DxflYMbmpIr9ejd+/eSElJEa9ZrVakpKQgOTnZjzNrXARBwLRp0/Djjz/ijz/+QJs2bWTP9+7dGzqdTvY5HT58GOnp6eLnlJycjL1798r+I1+zZg3Cw8Md/qKn6hk2bBj27t2LtLQ08atPnz4YP368+D0/J/8bOHCgw1EKR44cQevWrQEAbdq0QVxcnOxzKigowNatW2WfU15eHnbs2CGO+eOPP2C1WtG/f/86+Ckah5KSEqjV8l/lGo0GVqsVQCP+rPxd0RwIvvnmG8FgMAhLliwRDhw4IDz00ENCZGSkbDcH1a4pU6YIERERwrp164Rz586JXyUlJeKYhx9+WGjVqpXwxx9/CNu3bxeSk5OF5ORk8XnbFuPrr79eSEtLE1auXClER0dzi3Etk+6WEgR+TvVBamqqoNVqhddff104evSo8NVXXwnBwcHCf//7X3HMvHnzhMjISOGnn34S9uzZI4wdO1Zxe3GvXr2ErVu3Chs3bhTat2/foLcX10cTJ04U4uPjxa3gy5YtE6KiooRnnnlGHNMYPysGNz7y/vvvC61atRL0er3Qr18/YcuWLf6eUqMCQPHr888/F8eUlpYKjzzyiNCkSRMhODhYuOWWW4Rz587J7nPq1CnhhhtuEIKCgoSoqCjhySefFCoqKur4p2lc7IMbfk71wy+//CJ07dpVMBgMQseOHYWPP/5Y9rzVahVefPFFITY2VjAYDMKwYcOEw4cPy8ZcvHhRGDdunBAaGiqEh4cLkyZNEgoLC+vyxwh4BQUFwmOPPSa0atVKMBqNQtu2bYXnn39edixCY/ysVIIgOcaQiIiIqIFjzQ0REREFFAY3REREFFAY3BAREVFAYXBDREREAYXBDREREQUUBjdEREQUUBjcEBERUUBhcENEREQBhcENEdWaxMRELFiwwOPx69atg0qlcmicGai8/fMhIs9o/T0BIqo/hgwZgp49e/rsF+62bdsQEhLi8fgBAwbg3LlziIiI8Mn7E1HjxOCGiLwiCAIsFgu0Wvd/fURHR3t1b71ej7i4uOpOjYgIAJeliOiy++67D+vXr8e7774LlUoFlUqFU6dOiUtFv//+O3r37g2DwYCNGzfi+PHjGDt2LGJjYxEaGoq+ffti7dq1snvaL7uoVCp8+umnuOWWWxAcHIz27dvj559/Fp+3X5ZasmQJIiMjsWrVKnTq1AmhoaEYOXIkzp07J77GbDZj+vTpiIyMRLNmzfDss89i4sSJuPnmm13+vBs3bsTVV1+NoKAgJCQkYPr06SguLpbN/dVXX8W4ceMQEhKC+Ph4LFy4UHaP9PR0jB07FqGhoQgPD8cdd9yB7Oxs2ZhffvkFffv2hdFoRFRUFG655RbZ8yUlJbj//vsRFhaGVq1a4eOPP3Y5byJyj8ENEQEA3n33XSQnJ2Py5Mk4d+4czp07h4SEBPH5mTNnYt68eTh48CC6d++OoqIijBo1CikpKdi1axdGjhyJMWPGID093eX7vPzyy7jjjjuwZ88ejBo1CuPHj0dubq7T8SUlJXjrrbfw5ZdfYsOGDUhPT8dTTz0lPv9///d/+Oqrr/D5559j06ZNKCgowPLly13O4fjx4xg5ciRuu+027NmzB0uXLsXGjRsxbdo02bg333wTPXr0wK5duzBz5kw89thjWLNmDQDAarVi7NixyM3Nxfr167FmzRqcOHECd955p/j6FStW4JZbbsGoUaOwa9cupKSkoF+/frL3ePvtt9GnTx/s2rULjzzyCKZMmYLDhw+7nD8RueHnruREVI8MHjxYeOyxx2TX/vzzTwGAsHz5crev79Kli/D++++Lj1u3bi2888474mMAwgsvvCA+LioqEgAIv//+u+y9Ll26JAiCIHz++ecCAOHYsWPiaxYuXCjExsaKj2NjY4U333xTfGw2m4VWrVoJY8eOdTrPBx54QHjooYdk1/766y9BrVYLpaWl4txHjhwpG3PnnXcKN9xwgyAIgrB69WpBo9EI6enp4vP79+8XAAipqamCIAhCcnKyMH78eKfzaN26tXDPPfeIj61WqxATEyN89NFHTl9DRO4xc0NEHunTp4/scVFREZ566il06tQJkZGRCA0NxcGDB91mbrp37y5+HxISgvDwcOTk5DgdHxwcjHbt2omPmzdvLo7Pz89Hdna2LBui0WjQu3dvl3PYvXs3lixZgtDQUPFrxIgRsFqtOHnypDguOTlZ9rrk5GQcPHgQAHDw4EEkJCTIsludO3dGZGSkOCYtLQ3Dhg1zORfpn4dKpUJcXJzLPw8ico8FxUTkEftdT0899RTWrFmDt956C0lJSQgKCsI//vEPlJeXu7yPTqeTPVapVLBarV6NFwTBy9nLFRUV4Z///CemT5/u8FyrVq1qdG+poKAgt2O8/fMgIveYuSEikV6vh8Vi8Wjspk2bcN999+GWW25Bt27dEBcXh1OnTtXuBO1EREQgNjYW27ZtE69ZLBbs3LnT5euuvPJKHDhwAElJSQ5fer1eHLdlyxbZ67Zs2YJOnToBADp16oSMjAxkZGSIzx84cAB5eXno3LkzgMqsTEpKSo1/TiLyDjM3RCRKTEzE1q1bcerUKYSGhqJp06ZOx7Zv3x7Lli3DmDFjoFKp8OKLL/ol4/Doo49i7ty5SEpKQseOHfH+++/j0qVLUKlUTl/z7LPP4qqrrsK0adPw4IMPIiQkBAcOHMCaNWvwwQcfiOM2bdqEf/3rX7j55puxZs0afPfdd1ixYgUAYPjw4ejWrRvGjx+PBQsWwGw245FHHsHgwYPFJbw5c+Zg2LBhaNeuHe666y6YzWb89ttvePbZZ2v3D4WokWPmhohETz31FDQaDTp37ozo6GiX9TPz589HkyZNMGDAAIwZMwYjRozAlVdeWYezrfTss89i3LhxmDBhApKTk8X6GaPR6PQ13bt3x/r163HkyBFcffXV6NWrF2bPno0WLVrIxj355JPYvn07evXqhddeew3z58/HiBEjAFQuH/30009o0qQJrrnmGgwfPhxt27bF0qVLxdcPGTIE3333HX7++Wf07NkT1157LVJTU2vnD4KIRCqhpovXRET1iNVqRadOnXDHHXfg1VdfrfZ9EhMT8fjjj+Pxxx/33eSIqE5wWYqIGrTTp09j9erVGDx4MEwmEz744AOcPHkSd999t7+nRkR+wmUpImrQ1Go1lixZgr59+2LgwIHYu3cv1q5dKxb+ElHjw2UpIiIiCijM3BAREVFAYXBDREREAYXBDREREQUUBjdEREQUUBjcEBERUUBhcENEREQBhcENERERBRQGN0RERBRQ/h9f6U3psFCS6AAAAABJRU5ErkJggg==",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "eval_loss = 0.5962\n"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "from torch import nn\n",
    "\n",
    "class LR(nn.Module):\n",
    "    def __init__(self, input_dim, output_dim):\n",
    "        super(LR, self).__init__()\n",
    "        self.linear = nn.Linear(input_dim, output_dim)\n",
    "        \n",
    "    def forward(self, input_feats, labels=None):\n",
    "        outputs = self.linear(input_feats)\n",
    "        \n",
    "        if labels is not None:\n",
    "            loss_fc = nn.CrossEntropyLoss()\n",
    "            loss = loss_fc(outputs, labels)\n",
    "            return (loss, outputs)\n",
    "        \n",
    "        return outputs\n",
    "\n",
    "model = LR(len(dataset.token2id), len(dataset.label2id))\n",
    "\n",
    "from torch.utils.data import Dataset, DataLoader\n",
    "from torch.optim import SGD, Adam\n",
    "\n",
    "# 使用PyTorch的DataLoader来进行数据循环，因此按照PyTorch的接口\n",
    "# 实现myDataset和DataCollator两个类\n",
    "# myDataset是对特征向量和标签的简单封装便于对齐接口，\n",
    "# DataCollator用于批量将数据转化为PyTorch支持的张量类型\n",
    "class myDataset(Dataset):\n",
    "    def __init__(self, X, Y):\n",
    "        self.X = X\n",
    "        self.Y = Y\n",
    "        \n",
    "    def __len__(self):\n",
    "        return len(self.X)\n",
    "\n",
    "    def __getitem__(self, idx):\n",
    "        return (self.X[idx], self.Y[idx])\n",
    "\n",
    "class DataCollator:\n",
    "    @classmethod\n",
    "    def collate_batch(cls, batch):\n",
    "        feats, labels = [], []\n",
    "        for x, y in batch:\n",
    "            feats.append(x)\n",
    "            labels.append(y)\n",
    "        # 直接将一个ndarray的列表转化为张量是非常慢的，\n",
    "        # 所以需要提前将列表转化为一整个ndarray\n",
    "        feats = torch.tensor(np.array(feats), dtype=torch.float)\n",
    "        labels = torch.tensor(np.array(labels), dtype=torch.long)\n",
    "        return {'input_feats': feats, 'labels': labels}\n",
    "\n",
    "# 设置训练超参数和优化器，模型初始化\n",
    "epochs = 50\n",
    "batch_size = 128\n",
    "learning_rate = 1e-3\n",
    "weight_decay = 0\n",
    "\n",
    "train_dataset = myDataset(train_F, train_Y)\n",
    "test_dataset = myDataset(test_F, test_Y)\n",
    "\n",
    "data_collator = DataCollator()\n",
    "train_dataloader = DataLoader(train_dataset, batch_size=batch_size,\\\n",
    "    shuffle=True, collate_fn=data_collator.collate_batch)\n",
    "test_dataloader = DataLoader(test_dataset, batch_size=batch_size,\\\n",
    "    shuffle=False, collate_fn=data_collator.collate_batch)\n",
    "optimizer = Adam(model.parameters(), lr=learning_rate,\\\n",
    "    weight_decay=weight_decay)\n",
    "model.zero_grad()\n",
    "model.train()\n",
    "\n",
    "from tqdm import tqdm, trange\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# 模型训练\n",
    "with trange(epochs, desc='epoch', ncols=60) as pbar:\n",
    "    epoch_loss = []\n",
    "    for epoch in pbar:\n",
    "        model.train()\n",
    "        for step, batch in enumerate(train_dataloader):\n",
    "            loss = model(**batch)[0]\n",
    "            pbar.set_description(f'epoch-{epoch}, loss={loss.item():.4f}')\n",
    "            loss.backward()\n",
    "            optimizer.step()\n",
    "            model.zero_grad()\n",
    "            epoch_loss.append(loss.item())\n",
    "\n",
    "    epoch_loss = np.array(epoch_loss)\n",
    "    # 打印损失曲线\n",
    "    plt.plot(range(len(epoch_loss)), epoch_loss)\n",
    "    plt.xlabel('training epoch')\n",
    "    plt.ylabel('loss')\n",
    "    plt.show()\n",
    "    \n",
    "    model.eval()\n",
    "    with torch.no_grad():\n",
    "        loss_terms = []\n",
    "        for batch in test_dataloader:\n",
    "            loss = model(**batch)[0]\n",
    "            loss_terms.append(loss.item())\n",
    "        print(f'eval_loss = {np.mean(loss_terms):.4f}')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "10808854",
   "metadata": {},
   "source": [
    "下面的代码使用训练好的模型对测试集进行预测，并报告分类结果。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "11a9bf62",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "test example-0, prediction = 0, label = 0\n",
      "test example-1, prediction = 1, label = 1\n",
      "test example-2, prediction = 0, label = 0\n",
      "test example-3, prediction = 0, label = 0\n",
      "test example-4, prediction = 0, label = 0\n"
     ]
    }
   ],
   "source": [
    "LR_preds = []\n",
    "model.eval()\n",
    "for batch in test_dataloader:\n",
    "    with torch.no_grad():\n",
    "        _, preds = model(**batch)\n",
    "        preds = np.argmax(preds, axis=1)\n",
    "        LR_preds.extend(preds)\n",
    "            \n",
    "for i, (p, y) in enumerate(zip(LR_preds, test_Y)):\n",
    "    if i >= 5:\n",
    "        break\n",
    "    print(f'test example-{i}, prediction = {p}, label = {y}')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c5feb65e",
   "metadata": {},
   "source": [
    "下面的代码展示多分类情况下宏平均和微平均的算法。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "a5ac32c5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NB: micro-f1 = 0.83493682624319, macro-f1 = 0.8303020425041593\n",
      "LR: micro-f1 = 0.8424713110003478, macro-f1 = 0.8382601222462621\n"
     ]
    }
   ],
   "source": [
    "test_Y = np.array(test_Y)\n",
    "NB_preds = np.array(NB_preds)\n",
    "LR_preds = np.array(LR_preds)\n",
    "\n",
    "def micro_f1(preds, labels):\n",
    "    TP = np.sum(preds == labels)\n",
    "    FN = FP = 0\n",
    "    for i in range(len(dataset.label2id)):\n",
    "        FN += np.sum((preds == i) & (labels != i))\n",
    "        FP += np.sum((preds != i) & (labels == i))\n",
    "    precision = TP / (TP + FP)\n",
    "    recall = TP / (TP + FN)\n",
    "    f1 = 2 * precision * recall / (precision + recall)\n",
    "    return f1\n",
    "\n",
    "def macro_f1(preds, labels):\n",
    "    f_scores = []\n",
    "    for i in range(len(dataset.label2id)):\n",
    "        TP = np.sum((preds == i) & (labels == i))\n",
    "        FN = np.sum((preds == i) & (labels != i))\n",
    "        FP = np.sum((preds != i) & (labels == i))\n",
    "        precision = TP / (TP + FP)\n",
    "        recall = TP / (TP + FN)\n",
    "        f1 = 2 * precision * recall / (precision + recall)\n",
    "        f_scores.append(f1)\n",
    "    return np.mean(f_scores)\n",
    "\n",
    "print(f'NB: micro-f1 = {micro_f1(NB_preds, test_Y)}, '+\\\n",
    "      f'macro-f1 = {macro_f1(NB_preds, test_Y)}')\n",
    "print(f'LR: micro-f1 = {micro_f1(LR_preds, test_Y)}, '+\\\n",
    "      f'macro-f1 = {macro_f1(LR_preds, test_Y)}')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
